In [98]:
# Import dependencies
import pandas as pd

In [99]:
# Set csv paths
schools_path = 'raw_data/schools_complete.csv'
students_path = 'raw_data/students_complete.csv'

In [100]:
# Read schools csv
schools_df = pd.read_csv(schools_path)
schools_df.head()

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [101]:
# Read students csv
students_df = pd.read_csv(students_path)
students_df.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


# District Summary

In [102]:
# Grab total schools
total_schools = schools_df['name'].count()

# Grab total students
total_students = schools_df['size'].sum()

# Grab total budget
total_budget = schools_df['budget'].sum()

# Grab average math score
avg_math = students_df['math_score'].mean()

# Grab average reading score
avg_read = students_df['reading_score'].mean()

# Calculate % passing math
pass_math = (students_df['math_score'] >= 60).sum()/total_students*100

# Calculate % passing reading
pass_read = (students_df['reading_score'] >= 60).sum()/total_students*100

# Calculate overall passing grade
overall_pass = (pass_math + pass_read)/2

In [103]:
# Create District Summary dataframe
district_summary = pd.DataFrame({'Total Schools': [total_schools], 
                                 'Total Students': [total_students], 
                                 'Total Budget': [f'${total_budget:,.2f}'], 
                                 'Average Math Score': [avg_math], 
                                 'Average Reading Score': [avg_read], 
                                 '% Passing Math': [pass_math], 
                                 '% Passing Reading': [pass_read], 
                                 '% Overall Passing Rate': [overall_pass]})

# Reorder columns
district_summary = district_summary[['Total Schools', 'Total Students', 'Total Budget', 'Average Math Score', 
                                     'Average Reading Score', '% Passing Math', '% Passing Reading', '% Overall Passing Rate']]

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,92.445749,100.0,96.222875


# School Summary

In [104]:
# Use students dataframe to groupby school
grouped_students = students_df.groupby(['school'])
grouped_students

<pandas.core.groupby.DataFrameGroupBy object at 0x000001BC479E7A20>

In [105]:
# Grab average math and reading scores for each school
avg_math_scores = grouped_students['math_score'].mean()
avg_read_scores = grouped_students['reading_score'].mean()

# Create dataframe from averages
student_scores = pd.DataFrame({'Average Math Score': avg_math_scores, 
                                'Average Reading Score': avg_read_scores})

# Reset index for merge
student_scores = student_summary.reset_index(drop=True)

# Drop index column
student_scores = student_scores.drop(['index'], axis=1)
student_scores.head()

Unnamed: 0,school,Average Math Score,Average Reading Score
0,Bailey High School,77.048432,81.033963
1,Cabrera High School,83.061895,83.97578
2,Figueroa High School,76.711767,81.15802
3,Ford High School,77.102592,80.746258
4,Griffin High School,83.351499,83.816757


In [106]:
# Subset original students df to count passing students for each school
only_pass_math = students_df.loc[students_df['math_score'] >= 60,:]
only_pass_read = students_df.loc[students_df['reading_score'] >= 60,:]

# Group by school
grouped_only_pass_math = only_pass_math.groupby(['school'])
grouped_only_pass_read = only_pass_read.groupby(['school'])

# Grab student counts who passed for each school
pass_math = grouped_only_pass_math['math_score'].count()
pass_read = grouped_only_pass_read['reading_score'].count()

In [107]:
# Insert group by series into dataframe
pass_summary = pd.DataFrame({'Passing Math Counts': pass_math, 'Passing Reading Counts': pass_read})

# Reset index
pass_summary = pass_summary.reset_index()
pass_summary.head()

Unnamed: 0,school,Passing Math Counts,Passing Reading Counts
0,Bailey High School,4455,4976
1,Cabrera High School,1858,1858
2,Figueroa High School,2608,2949
3,Ford High School,2446,2739
4,Griffin High School,1468,1468


In [108]:
# Merge student_scores df and pass_summary on school
merge_stu_pass = pd.merge(student_scores, pass_summary, on='school')
merge_stu_pass.head()

Unnamed: 0,school,Average Math Score,Average Reading Score,Passing Math Counts,Passing Reading Counts
0,Bailey High School,77.048432,81.033963,4455,4976
1,Cabrera High School,83.061895,83.97578,1858,1858
2,Figueroa High School,76.711767,81.15802,2608,2949
3,Ford High School,77.102592,80.746258,2446,2739
4,Griffin High School,83.351499,83.816757,1468,1468


In [109]:
# Rename columns and set df to schools summary
schools_summary = schools_df.rename(columns={'name': 'school', 'type': 'School Type', 'size': 'Total Students', 
                                             'budget': 'Total School Budget'})
# Drop School ID
schools_summary = schools_summary.drop(['School ID'], axis=1)

# Add budget per student column
schools_summary['Per Student Budget'] = schools_summary['Total School Budget']/schools_summary['Total Students']
schools_summary.head()

Unnamed: 0,school,School Type,Total Students,Total School Budget,Per Student Budget
0,Huang High School,District,2917,1910635,655.0
1,Figueroa High School,District,2949,1884411,639.0
2,Shelton High School,Charter,1761,1056600,600.0
3,Hernandez High School,District,4635,3022020,652.0
4,Griffin High School,Charter,1468,917500,625.0


In [116]:
# Merge schools_summary and students_summary on school
combined_stu_sch = pd.merge(schools_summary, merge_stu_pass, on='school')

# Add % passing math and reading
combined_stu_sch['% Passing Math'] = combined_stu_sch['Passing Math Counts']/combined_stu_sch['Total Students']*100
combined_stu_sch['% Passing Reading'] = combined_stu_sch['Passing Reading Counts']/combined_stu_sch['Total Students']*100

# Add Overall Passing Rate Column
combined_stu_sch['% Overall Passing Rate'] = (combined_stu_sch['% Passing Math'] + combined_stu_sch['% Passing Reading'])/2

# Drop passing math and reading counts columns
combined_stu_sch = combined_stu_sch.drop(['Passing Math Counts', 'Passing Reading Counts'], axis=1)

# Map to format budget columns
combined_stu_sch['Total School Budget'] = combined_stu_sch['Total School Budget'].map('${:,.2f}'.format)
combined_stu_sch['Per Student Budget'] = combined_stu_sch['Per Student Budget'].map('${:,.2f}'.format)

# Rename school column
combined_stu_sch = combined_stu_sch.rename(columns={'school': 'School Name'})

# Index school for visibility
combined_stu_sch = combined_stu_sch.set_index(['School Name'])
combined_stu_sch

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,88.858416,100.0,94.429208
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,88.436758,100.0,94.218379
Shelton High School,Charter,1761,"$1,056,600.00",$600.00,83.359455,83.725724,100.0,100.0,100.0
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,89.083064,100.0,94.541532
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,100.0,100.0,100.0
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.274201,83.989488,100.0,100.0,100.0
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,100.0,100.0,100.0
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,89.529743,100.0,94.764871
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,100.0,100.0,100.0
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,100.0,100.0,100.0


# Top Performing Schools (By Passing Rate)