In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

In [2]:
# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

In [3]:
# calculate total number of schools
tot_schools=school_data["School ID"].count()
# calculate total number of students
tot_students=school_data["size"].sum()
# calculate total budget
tot_budget="${:,.2f}".format(school_data["budget"].sum())

In [10]:
# Combine the data into a single dataset.  
clean_school_data = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"]).copy()
clean_school_data.head(2)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635


In [5]:
# calculate average math score
avg_math_score=round(clean_school_data.math_score.mean(),6)
# calculate average reading score
avg_read_score=round(clean_school_data.reading_score.mean(),5)
# calculate percentage of passing math
perc_pass_math=round(len(clean_school_data.loc[clean_school_data['math_score']>=70])/tot_students,8)
# calculate percentage of passing reading 
perc_pass_read=round(len(clean_school_data.loc[clean_school_data['reading_score']>=70])/tot_students,8)
# calculate percentage of passing math and reading
perc_overall_pass=round(len(clean_school_data.loc[(clean_school_data['reading_score']>=70) & 
                                            (clean_school_data['math_score']>=70)])/tot_students,8)

# create a dataframe for District Summary
dist_summary_df=pd.DataFrame({"Total Schools":[tot_schools], "Total Students":[tot_students], "Total Budget":[tot_budget],
                "Average Math Score":[avg_math_score], "Average Reading Score":[avg_read_score],
                "% Passing Math":[perc_pass_math*100], "% Passing Reading":[perc_pass_read*100],
                "% Overall Passing":[perc_overall_pass*100]})
dist_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


In [6]:
# School Summary
school_names=clean_school_data.groupby(['school_name'])
# extracting school type from the grouped school names
school_type=school_names.type.unique()

In [7]:
# total students each school
tot_studin_school=school_names['Student ID'].count()
# total budget of each school
tot_school_budget=school_names['budget'].mean()
# per student budget of each school
per_stud_budget=tot_school_budget/tot_studin_school
# average math score of each school
avg_school_math_score=school_names['math_score'].mean()
# average reading score of each school
avg_school_read_score=school_names['reading_score'].mean()

In [8]:
# passing math percentage of each school
pass_math=clean_school_data.loc[clean_school_data['math_score']>=70]
math_school=pass_math.groupby(['school_name']).count()
school_students=clean_school_data['school_name'].value_counts()
math_perc_pass=(math_school.math_score/school_students)*100

# passing reading percentage of each school
pass_read=clean_school_data.loc[clean_school_data['reading_score']>=70]
read_school=pass_read.groupby(['school_name']).count()
read_perc_pass=(read_school.reading_score/school_students)*100
# percentage of students that passed math and reading
overall_pass=clean_school_data.loc[(clean_school_data['reading_score']>=70) & 
                                            (clean_school_data['math_score']>=70)]
overall_school=overall_pass.groupby(['school_name']).count()
overall_perc_pass=(overall_school['math_score']&overall_school['reading_score'])/school_students*100

In [9]:
# create the school summary dataframe
school_summary = pd.DataFrame({
    "Student Count":tot_studin_school,
    "School Type":school_type, 
    "School Budget":tot_school_budget, 
    "Per Student Budget":per_stud_budget,
    "Ave Math Score":avg_school_math_score, 
    "Ave Reading Score":avg_school_read_score, 
    "% Passing Math":math_perc_pass, 
    "% Passing Reading":read_perc_pass,
    "Overall Passing Rate":overall_perc_pass})

school_summary

Unnamed: 0_level_0,Student Count,School Type,School Budget,Per Student Budget,Ave Math Score,Ave Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,4976,[District],3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,1858,[Charter],1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,2949,[District],1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,2739,[District],1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,1468,[Charter],917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,4635,[District],3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,427,[Charter],248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,2917,[District],1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,4761,[District],3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,962,[Charter],585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541
