In [79]:
# Dependencies and Setup
import pandas as pd
import statistics as st

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

school_data.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [12]:
student_data.head(2)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61


In [74]:
# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head(2)


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635


In [75]:
# list all passing grades for math
pass_math_df = student_data.loc[student_data["math_score"] >= 70,["math_score"]]
# list all passing grades for reading
pass_reading_df = student_data.loc[student_data["reading_score"] >= 70,["reading_score"]]

# list all passing grades for both math and reading
pass_all_df = student_data.loc[   (student_data["reading_score"] >= 70) &    
                               (student_data["math_score"] >= 70),
                               ["reading_score", "math_score"]]


In [77]:
district_summary_df = pd.DataFrame({
    'Total Schools': [len(school_data)],
    "Total Students": ['{:,}'.format(len(student_data))],
    "Total Budget": ['{:,}'.format(sum(school_data['budget']))],
    "Average Math Score": [round(st.mean(student_data["math_score"]),2)],
    "Average Reading Score": [round(st.mean(student_data["reading_score"]),2)],
    "% Passing Math": [round(100 * len(pass_math_df) / len(student_data),2)],
    "% Passing Reading": [round(100 * len(pass_reading_df) / len(student_data),2)],
    "% Overall Passing": [round(100 * len(pass_all_df) / len(student_data),2)]
})
district_summary_df.style.hide_index()
district_summary_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.99,81.88,74.98,85.81,65.17


In [81]:
schools_total_df = school_data_complete[[
    "school_name","type","Student ID","budget","math_score", "reading_score"
]]
schools_total_df.head()

Unnamed: 0,school_name,type,Student ID,budget,math_score,reading_score
0,Huang High School,District,0,1910635,79,66
1,Huang High School,District,1,1910635,61,94
2,Huang High School,District,2,1910635,60,90
3,Huang High School,District,3,1910635,58,67
4,Huang High School,District,4,1910635,84,97
