In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [3]:
#average reading score
avg_reading_score = school_data_complete["reading_score"].mean()

#Average math score
avg_math_score = school_data_complete["math_score"].mean()

#total number of students
total_students = school_data_complete["Student ID"].count()

#total schools
#schools_students = school_data_complete["school_name"].value_counts()
#total_schools = schools_students.count()

#school count
total_schools = len(school_data_complete["school_name"].unique())

#total budget
budget = school_data_complete["budget"].unique()
total_budget = budget.sum()

#overall pass rate
pass_rate = (avg_math_score + avg_reading_score)/2

#% pass math and reading
school_data_complete["pass_math"] = school_data_complete["math_score"] >= 70
school_data_complete["pass_reading"] = school_data_complete["reading_score"] >= 70

pass_math_count = school_data_complete["pass_math"].value_counts()
per_pass_math = (pass_math_count[True]/len(school_data_complete))*100


pass_reading_count = school_data_complete["pass_reading"].value_counts()
per_pass_reading = (pass_reading_count[True]/len(school_data_complete))*100



In [4]:
#create new district summary data frame and reformat
district_summary = pd.DataFrame({"Total Schools": [total_schools],
                           "Total Students": [total_students],
                           "Total Budget": [total_budget],
                           "Average Math Score": [avg_math_score],
                           "Average Reading Score": [avg_reading_score],
                           "% Passing Math": [per_pass_math],
                                "% Passing Reading": [per_pass_reading],
                                "% Overall Passing Rate": [pass_rate]},)

district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{:.2f}%".format)
district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{:.2f}%".format)
district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{:.2f}%".format)
district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{:.2f}%".format)
district_summary["% Overall Passing Rate"] = district_summary["% Overall Passing Rate"].map("{:.2f}%".format)
district_summary.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.99%,81.88%,74.98%,85.81%,80.43%


In [17]:
school_summary = school_data_complete.groupby("school_name")

per_school_counts = school_data_complete["school_name"].value_counts()
per_school_budget = school_data_complete.groupby(["school_name"]).mean()["budget"]
per_school_capita = (per_school_budget / per_school_counts)

avg_math_score = school_summary["math_score"].mean()
avg_reading_score = school_summary["reading_score"].mean()
school_type = school_summary["type"].unique()
total_students = school_summary["size"].unique()
school_budget = school_summary["budget"].unique()
student_budget = school_budget/total_students

math_pass_students = school_data_complete[(school_data_complete["math_score"] >= 70)]
math_pass_percent = (math_pass_students["school_name"].value_counts()/ per_school_counts)*100

reading_pass_students = school_data_complete[(school_data_complete["reading_score"] >= 70)]
reading_pass_percent = (reading_pass_students["school_name"].value_counts()/ per_school_counts)*100

overall_pass_rate = (math_pass_percent + reading_pass_percent)/2


In [18]:
#math_pass_students = school_data_complete[(school_data_complete["math_score"] >= 70)]
#math_pass_students["school_name"].value_counts()/ per_school_counts

In [19]:
school_summary_dataframe = pd.DataFrame({"School Type": school_type,
                                "Total Students": total_students,
                                 "School Budget": school_budget,
                                 "Per Student Budget": student_budget,
                                 "Average Math Score": avg_math_score,
                                 "Average Reading Score": avg_reading_score,
                                 "% Passing Math": math_pass_percent,
                                 "% Passing Reading": reading_pass_percent,
                                 "% Overall Passing Rate": overall_pass_rate})

school_summary_dataframe["School Type"] = school_summary_dataframe["School Type"].map(lambda x: str(x)[2:-2])
school_summary_dataframe["Total Students"] = school_summary_dataframe["Total Students"].map(lambda x: str(x)[1:-1])
school_summary_dataframe["School Budget"] = school_summary_dataframe["School Budget"].map(lambda x: str(x)[1:-1])
#school_summary_dataframe["School Budget"] = school_summary_dataframe["School Budget"].map("${:,.2f}".format(float(u"5.0")))
school_summary_dataframe["Per Student Budget"] = school_summary_dataframe["Per Student Budget"].map(lambda x: str(x)[1:-2])
school_summary_dataframe["% Passing Math"] = school_summary_dataframe["% Passing Math"].map("{:.2f}%".format)
school_summary_dataframe["% Passing Reading"] = school_summary_dataframe["% Passing Reading"].map("{:.2f}%".format)
school_summary_dataframe["% Overall Passing Rate"] = school_summary_dataframe["% Overall Passing Rate"].map("{:.2f}%".format)
school_summary_dataframe

Unnamed: 0_level_0,School Type,Total Students,School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928,628,77.048432,81.033963,66.68%,81.93%,74.31%
Cabrera High School,Charter,1858,1081356,582,83.061895,83.97578,94.13%,97.04%,95.59%
Figueroa High School,District,2949,1884411,639,76.711767,81.15802,65.99%,80.74%,73.36%
Ford High School,District,2739,1763916,644,77.102592,80.746258,68.31%,79.30%,73.80%
Griffin High School,Charter,1468,917500,625,83.351499,83.816757,93.39%,97.14%,95.27%
Hernandez High School,District,4635,3022020,652,77.289752,80.934412,66.75%,80.86%,73.81%
Holden High School,Charter,427,248087,581,83.803279,83.814988,92.51%,96.25%,94.38%
Huang High School,District,2917,1910635,655,76.629414,81.182722,65.68%,81.32%,73.50%
Johnson High School,District,4761,3094650,650,77.072464,80.966394,66.06%,81.22%,73.64%
Pena High School,Charter,962,585858,609,83.839917,84.044699,94.59%,95.95%,95.27%


In [22]:
school_summary_dataframe = school_summary_dataframe.sort_values(by=["% Overall Passing Rate"], ascending=False)
school_summary_dataframe.head()

Unnamed: 0_level_0,School Type,Total Students,School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356,582,83.061895,83.97578,94.13%,97.04%,95.59%
Thomas High School,Charter,1635,1043130,638,83.418349,83.84893,93.27%,97.31%,95.29%
Griffin High School,Charter,1468,917500,625,83.351499,83.816757,93.39%,97.14%,95.27%
Pena High School,Charter,962,585858,609,83.839917,84.044699,94.59%,95.95%,95.27%
Wilson High School,Charter,2283,1319574,578,83.274201,83.989488,93.87%,96.54%,95.20%


In [23]:
school_summary_dataframe = school_summary_dataframe.sort_values(by=["% Overall Passing Rate"], ascending=True)
school_summary_dataframe.head()

Unnamed: 0_level_0,School Type,Total Students,School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,2547363,637,76.842711,80.744686,66.37%,80.22%,73.29%
Figueroa High School,District,2949,1884411,639,76.711767,81.15802,65.99%,80.74%,73.36%
Huang High School,District,2917,1910635,655,76.629414,81.182722,65.68%,81.32%,73.50%
Johnson High School,District,4761,3094650,650,77.072464,80.966394,66.06%,81.22%,73.64%
Ford High School,District,2739,1763916,644,77.102592,80.746258,68.31%,79.30%,73.80%


In [31]:
math_scores_sort = school_data_complete.groupby(["school_name", "grade"])
math_scores_sort = pd.DataFrame(math_scores_sort["math_score"].mean())
math_scores_sort

Unnamed: 0_level_0,Unnamed: 1_level_0,math_score
school_name,grade,Unnamed: 2_level_1
Bailey High School,10th,76.996772
Bailey High School,11th,77.515588
Bailey High School,12th,76.492218
Bailey High School,9th,77.083676
Cabrera High School,10th,83.154506
Cabrera High School,11th,82.76556
Cabrera High School,12th,83.277487
Cabrera High School,9th,83.094697
Figueroa High School,10th,76.539974
Figueroa High School,11th,76.884344
