In [69]:
# Import Dependencies
import pandas as pd
import numpy as np

In [70]:
csv_path = "Resources/schools_complete copy.csv"
schools_df = pd.read_csv(csv_path)

schools_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [71]:
csv_path = "Resources/students_complete copy.csv"
students_df = pd.read_csv(csv_path)

students_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [72]:
combined_df = pd.merge(schools_df, students_df, how="left", on = "school_name")
combined_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [73]:
district_df = combined_df.loc[combined_df["type"] == "District", :]
district_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [81]:
school_names = schools_df['school_name'].unique()
total_schools = len(school_names)
total_students = schools_df["size"].sum()
total_student_rec = students_df['student_name'].count()
total_budget = schools_df["budget"].sum()
avg_math_score = combined_df["math_score"].mean()
avg_read_score = combined_df["reading_score"].mean()
passing_math = students_df.loc[students_df['math_score'] >= 70]['math_score'].count()
perc_pass_math = (passing_math/total_students)
passing_reading = students_df.loc[students_df['reading_score'] >= 70]['reading_score'].count()
perc_pass_reading = (passing_reading/total_students)
overall_pass = (avg_math_score + avg_read_score)/2

district_summary = pd.DataFrame  ({"Total Schools": [total_schools],
    "Total Students": [total_students],
    "Total Budget": [total_budget],
    "Average Math Score": [avg_math_score],                               
    "Average Reading Score": [avg_read_score],
    "% Passing Math": [perc_pass_math],                              
    "% Passing Reading":[perc_pass_reading],
    "Overall Passing Rate": [overall_pass]  })

district_summary.style.format({"Total Budget": "${:,.2f}", 
                       "Average Reading Score": "{:.1f}", 
                       "Average Math Score": "{:.1f}", 
                       "% Passing Math": "{:.1%}", 
                       "% Passing Reading": "{:.1%}", 
                       "Overall Passing Rate": "{:.1%}"})




district_summary.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,15,39170,24649428,78.985371,81.87784,0.749809,0.858055,80.431606


In [98]:
school_group = combined_df.set_index('school_name').groupby(['school_name'])
types = schools_df.set_index('school_name')['type']

# total students by school
tot_stu = school_group['Student ID'].count()

# school budget
sch_budget = schools_df.set_index('school_name')['budget']

#per student budget
stu_budget = schools_df.set_index('school_name')['budget']/schools_df.set_index('school_name')['size']

#avg scores by school
avg_math = school_group['math_score'].mean()
avg_read = school_group['reading_score'].mean()

# % passing scores
pass_math = combined_df[combined_df['math_score'] >= 70].groupby('school_name')['Student ID'].count()/tot_stu
pass_read = combined_df[combined_df['reading_score'] >= 70].groupby('school_name')['Student ID'].count()/tot_stu 
overall = (pass_math + pass_read)/2 

school_summary = pd.DataFrame({
    "School Type": types,
    "Total Students": tot_stu,
    "Per Student Budget": stu_budget,
    "Total School Budget": sch_budget,
    "Average Math Score": avg_math,
    "Average Reading Score": avg_read,
    '% Passing Math': pass_math,
    '% Passing Reading': pass_read,
    "Overall Passing Rate": overall
})

school_summary.head()

Unnamed: 0,School Type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Bailey High School,District,4976,628.0,3124928,77.048432,81.033963,0.666801,0.819333,0.743067
Cabrera High School,Charter,1858,582.0,1081356,83.061895,83.97578,0.941335,0.970398,0.955867
Figueroa High School,District,2949,639.0,1884411,76.711767,81.15802,0.659885,0.807392,0.733639
Ford High School,District,2739,644.0,1763916,77.102592,80.746258,0.683096,0.79299,0.738043
Griffin High School,Charter,1468,625.0,917500,83.351499,83.816757,0.933924,0.97139,0.952657


In [99]:
# sort values by passing rate and then only print top 5 
top_5 = school_summary.sort_values("Overall Passing Rate", ascending = False)
top_5.head(5)

Unnamed: 0,School Type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Cabrera High School,Charter,1858,582.0,1081356,83.061895,83.97578,0.941335,0.970398,0.955867
Thomas High School,Charter,1635,638.0,1043130,83.418349,83.84893,0.932722,0.973089,0.952905
Pena High School,Charter,962,609.0,585858,83.839917,84.044699,0.945946,0.959459,0.952703
Griffin High School,Charter,1468,625.0,917500,83.351499,83.816757,0.933924,0.97139,0.952657
Wilson High School,Charter,2283,578.0,1319574,83.274201,83.989488,0.938677,0.965396,0.952037


In [103]:
bottom_5 = top_5.tail()
bottom_5 = school_summary.sort_values("Overall Passing Rate")

bottom_5.head(5)



Unnamed: 0,School Type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Rodriguez High School,District,3999,637.0,2547363,76.842711,80.744686,0.663666,0.802201,0.732933
Figueroa High School,District,2949,639.0,1884411,76.711767,81.15802,0.659885,0.807392,0.733639
Huang High School,District,2917,655.0,1910635,76.629414,81.182722,0.656839,0.813164,0.735002
Johnson High School,District,4761,650.0,3094650,77.072464,80.966394,0.660576,0.812224,0.7364
Ford High School,District,2739,644.0,1763916,77.102592,80.746258,0.683096,0.79299,0.738043
