In [3]:
import os 
import pandas as pd

In [4]:
schools_data_csvpath = os.path.join('data','schools_complete.csv')
students_data_csvpath = os.path.join('data','students_complete.csv')

In [6]:
schools_df = pd.read_csv(schools_data_csvpath)

schools_df.head()

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [8]:
students_df = pd.read_csv(students_data_csvpath)

students_df.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [9]:
# changing names to merge
schools_df = schools_df.rename(columns={'name':'school'})

schools_df.head()

Unnamed: 0,School ID,school,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [20]:
district_df = pd.merge(schools_df,students_df,on='school')

district_df.head(5)

Unnamed: 0,School ID,school,type,size,budget,Student ID,name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [83]:
# district summary 
total_schools = len(district_df['school'].unique())
total_students = district_df['Student ID'].count()
total_budget = schools_df['budget'].sum()
avg_math_score = students_df['math_score'].mean()
avg_reading_score= students_df['reading_score'].mean()
percent_passing_reading =(students_df.loc[students_df['reading_score']>70]['reading_score'].count())/students_df['reading_score'].count()*100
percent_passing_math = students_df.loc[students_df['math_score']>70]['math_score'].count()/students_df['math_score'].count()*100
overall_passing_rate = (percent_passing_math + percent_passing_reading)/2


district_summary_df = pd.DataFrame({
    'Total Schools': total_schools,
    'Total Students':total_students,
    'Total budget': total_budget,
    'Average Math Score': avg_math_score,
    'Average Reading Score': avg_reading_score,
    '% Passing Reading': percent_passing_reading,
    '% Passing Math': percent_passing_math,
    'Overall Passing Rate': overall_passing_rate
}, index = [0])

district_summary_df['Total Students'] = district_summary_df['Total Students'].map(" {:,.0f}".format)
district_summary_df['Total budget'] = district_summary_df['Total budget'].map(" ${:,.0f}".format)
district_summary_df['Average Math Score'] = district_summary_df['Average Math Score'].map(" {:,.1f}".format)
district_summary_df['Average Reading Score'] = district_summary_df['Average Reading Score'].map(" {:,.1f}".format)
district_summary_df['% Passing Reading'] = district_summary_df['% Passing Reading'].map(" {:,.1f}%".format)
district_summary_df['% Passing Math'] = district_summary_df['% Passing Math'].map(" {:,.1f}%".format)
district_summary_df['Overall Passing Rate'] = district_summary_df['Overall Passing Rate'].map(" {:,.1f}%".format)

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total budget,Average Math Score,Average Reading Score,% Passing Reading,% Passing Math,Overall Passing Rate
0,15,39170,"$24,649,428",79.0,81.9,83.0%,72.4%,77.7%


In [143]:
total_students = district_df.groupby('school').count()['Student ID']
total_budget = district_df.groupby('school').sum()['budget']
per_student_budget = total_budget/total_students
avg_math_score = district_df.groupby('school').mean()['math_score']
avg_reading_score = district_df.groupby('school').mean()['reading_score']
percent_passing_math = district_df.loc[district_df['math_score'] >70].groupby('school').count()['math_score'] / school_summary_df['Total Students'] * 100
percent_passing_reading = district_df.loc[district_df['reading_score'] >70].groupby('school').count()['reading_score'] / school_summary_df['Total Students'] * 100
percent_total = (percent_passing_math + percent_passing_reading)/2


school_summary_df = pd.DataFrame({
    'Total Students': total_students,
    'Total Budget': total_budget,
    'Per Student Budget': per_student_budget,
    'Avg. Math Score': avg_math_score,
    'Avg. Reading Score': avg_reading_score,
    '% Passing Math': percent_passing_math,
    '% Passing Reading': percent_passing_reading,
    'Overall Passing Rate': percent_total
})

school_summary_df['Total Budget'] = school_summary_df['Total Budget'].map(" ${:,.0f}".format)
school_summary_df['Per Student Budget'] = school_summary_df['Per Student Budget'].map(" ${:,.0f}".format)
school_summary_df['Avg. Math Score'] = school_summary_df['Avg. Math Score'].map(" {:,.1f}".format)
school_summary_df['Avg. Reading Score'] = school_summary_df['Avg. Reading Score'].map(" {:,.1f}".format)
school_summary_df['% Passing Reading'] = school_summary_df['% Passing Reading'].map(" {:,.1f}%".format)
school_summary_df['% Passing Math'] = school_summary_df['% Passing Math'].map(" {:,.1f}%".format)
school_summary_df['Overall Passing Rate'] = school_summary_df['Overall Passing Rate'].map(" {:,.1f}%".format)

school_summary_df

Unnamed: 0_level_0,Total Students,Total Budget,Per Student Budget,Avg. Math Score,Avg. Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bailey High School,4976,"$15,549,641,728","$3,124,928",77.0,81.0,64.6%,79.3%,72.0%
Cabrera High School,1858,"$2,009,159,448","$1,081,356",83.1,84.0,89.6%,93.9%,91.7%
Figueroa High School,2949,"$5,557,128,039","$1,884,411",76.7,81.2,63.8%,78.4%,71.1%
Ford High School,2739,"$4,831,365,924","$1,763,916",77.1,80.7,65.8%,77.5%,71.6%
Griffin High School,1468,"$1,346,890,000","$917,500",83.4,83.8,89.7%,93.4%,91.6%
Hernandez High School,4635,"$14,007,062,700","$3,022,020",77.3,80.9,64.7%,78.2%,71.5%
Holden High School,427,"$105,933,149","$248,087",83.8,83.8,90.6%,92.7%,91.7%
Huang High School,2917,"$5,573,322,295","$1,910,635",76.6,81.2,63.3%,78.8%,71.1%
Johnson High School,4761,"$14,733,628,650","$3,094,650",77.1,81.0,63.9%,78.3%,71.1%
Pena High School,962,"$563,595,396","$585,858",83.8,84.0,91.7%,92.2%,91.9%


In [147]:
# Top performing school
school_summary_df = school_summary_df.sort_values('Overall Passing Rate',ascending=False)
school_summary_df.head()

Unnamed: 0_level_0,Total Students,Total Budget,Per Student Budget,Avg. Math Score,Avg. Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Wilson High School,2283,"$3,012,587,442","$1,319,574",83.3,84.0,90.9%,93.3%,92.1%
Pena High School,962,"$563,595,396","$585,858",83.8,84.0,91.7%,92.2%,91.9%
Wright High School,1800,"$1,888,920,000","$1,049,400",83.7,84.0,90.3%,93.4%,91.9%
Cabrera High School,1858,"$2,009,159,448","$1,081,356",83.1,84.0,89.6%,93.9%,91.7%
Holden High School,427,"$105,933,149","$248,087",83.8,83.8,90.6%,92.7%,91.7%


In [151]:
# Bottom performing school 
school_summary_df = school_summary_df.sort_values('Overall Passing Rate',ascending=True)
school_summary_df.head()

Unnamed: 0_level_0,Total Students,Total Budget,Per Student Budget,Avg. Math Score,Avg. Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Rodriguez High School,3999,"$10,186,904,637","$2,547,363",76.8,80.7,64.1%,77.7%,70.9%
Figueroa High School,2949,"$5,557,128,039","$1,884,411",76.7,81.2,63.8%,78.4%,71.1%
Huang High School,2917,"$5,573,322,295","$1,910,635",76.6,81.2,63.3%,78.8%,71.1%
Johnson High School,4761,"$14,733,628,650","$3,094,650",77.1,81.0,63.9%,78.3%,71.1%
Hernandez High School,4635,"$14,007,062,700","$3,022,020",77.3,80.9,64.7%,78.2%,71.5%


In [179]:
#math score by grade
math_score_avg = students_df.groupby(['school','grade']).mean()['math_score'].rename('Avg. Math Score')
math_score_by_grade = pd.DataFrame(math_score_avg)
math_score_by_grade['Avg. Math Score'] = math_score_by_grade['Avg. Math Score'].map(" {:,.1f}".format)

math_score_by_grade

Unnamed: 0_level_0,Unnamed: 1_level_0,Avg. Math Score
school,grade,Unnamed: 2_level_1
Bailey High School,10th,77.0
Bailey High School,11th,77.5
Bailey High School,12th,76.5
Bailey High School,9th,77.1
Cabrera High School,10th,83.2
Cabrera High School,11th,82.8
Cabrera High School,12th,83.3
Cabrera High School,9th,83.1
Figueroa High School,10th,76.5
Figueroa High School,11th,76.9


In [180]:
#english score by grade
reading_score_avg = students_df.groupby(['school','grade']).mean()['reading_score'].rename('Avg. Reading Score')
reading_score_by_grade = pd.DataFrame(reading_score_avg)
reading_score_by_grade['Avg. Reading Score'] = reading_score_by_grade['Avg. Math Score'].map(" {:,.1f}".format)

reading_score_by_grade

KeyError: 'Avg. Math Score'