In [None]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

In [None]:
#Get the unique values for the schools
total_schools = len(school_data_complete["school_name"].unique())
#Get the unique values for the students
total_students = len(school_data_complete["student_name"])
#Calculate the total budget
total_budget = school_data_complete["budget"].unique().sum(axis=0)
#Calculate the math score average
math_average = school_data_complete["math_score"].mean()
#Calculate the reading score average
reading_average = school_data_complete["reading_score"].mean()

In [None]:
#Find the number of students passing math
total_passing_math = len(school_data_complete.loc[school_data_complete["math_score"] >= 70,["student_name","math_score"]])
#Calculate the percentage of students passing math
pct_passing_math = (total_passing_math/total_students) * 100

In [None]:
#Find the number of students passing reading
total_passing_reading = len(school_data_complete.loc[school_data_complete["reading_score"] >= 70,["student_name","reading_score"]])
#Calculate the percentage of students passing reading
pct_passing_reading = (total_passing_reading/total_students) * 100

In [None]:
#Calculate the overall passing percentage
overall_passing = (math_average + reading_average) / 2

## District Summary

In [77]:
#Create a summary table dataframe with the new column headers using the variables created
district_summary = pd.DataFrame({"Total Schools": [total_schools],
                                 "Total Students": total_students,
                                 "Total Budget": total_budget,
                                 "Average Math Score": math_average,
                                 "Average Reading Score": reading_average,
                                 "% Passing Math": pct_passing_math,
                                 "% Passing Reading": pct_passing_reading,
                                 "% Overall Passing": overall_passing})

#Format the students and budget columns
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format) 
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format) 
#Print out the final summary
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,80.431606


In [None]:
#Group by the school name and get academic averages
groupby_school = school_data_complete.groupby(["school_name"])
grouped_data = groupby_school.mean()

In [None]:
#Add the School Type to the grouped data
school_type = pd.DataFrame({"school_name": school_data["school_name"],
                               "School Type": school_data["type"]})
type_grouped_data = pd.merge(grouped_data, school_type, on=["school_name","school_name"])

In [None]:
#Caluclate the Per Student Budget and add it to the grouped data
student_budget = pd.DataFrame({"school_name": school_data["school_name"],
                               "Per Student Budget": school_data["budget"] / school_data["size"]})
budget_grouped_data = pd.merge(type_grouped_data, student_budget, on=["school_name","school_name"])

In [None]:
#Get the list of students passing math
passing_math_list = school_data_complete.loc[school_data_complete["math_score"] >= 70,["school_name","math_score"]]
#Group by the passing math list by the school and count the students passing math
passing_math_by_school = passing_math_list.groupby(["school_name"]).count()
#Calculate the percent passing math for each school
school_pct_passing_math = pd.DataFrame({"% Passing Math": (passing_math_by_school["math_score"] / grouped_data["size"]) * 100})

In [None]:
#Get the list of students passing reading
passing_reading_list = school_data_complete.loc[school_data_complete["reading_score"] >= 70,["school_name","reading_score"]]
#Group by the passing reading list by the school and count the students passing reading
passing_reading_by_school = passing_reading_list.groupby(["school_name"]).count()
#Calculate the percent passing reading for each school
school_pct_passing_reading =  pd.DataFrame({"% Passing Reading":(passing_reading_by_school["reading_score"] / grouped_data["size"]) * 100})


In [None]:
#Calculate the oveall passing score for each school
overall_passing_school = pd.DataFrame({"% Overall Passing Rate":(school_pct_passing_math["% Passing Math"] + school_pct_passing_reading["% Passing Reading"]) / 2})
#Combine the percentages in to one data frame
add_academic_scores = pd.merge(school_pct_passing_math, school_pct_passing_reading, on=["school_name"])
all_school_percentages = pd.merge(add_academic_scores, overall_passing_school, on=["school_name"])

In [None]:
#Add the school percentages to the grouped data
final_grouped_data = pd.merge(budget_grouped_data, all_school_percentages, on=["school_name"])

## School Summary

In [101]:
#Create a dataframe with the School Summary
school_summary = pd.DataFrame({"School Name": final_grouped_data["school_name"],
                               "School Type": final_grouped_data["School Type"],
                               "Total Students": final_grouped_data["size"],
                               "Total School Budget": final_grouped_data["budget"],
                               "Per Student Budget":final_grouped_data["Per Student Budget"],
                               "Average Math Score": final_grouped_data["math_score"],
                               "Average Reading Score": final_grouped_data["reading_score"],
                               "Per Student Budget": final_grouped_data["Per Student Budget"],
                               "% Passing Math": final_grouped_data["% Passing Math"],
                               "% Passing Reading": final_grouped_data["% Passing Reading"],
                               "% Overall Passing Rate": final_grouped_data["% Overall Passing Rate"]}).set_index("School Name", drop=True) 

#Format the student and budget columns
school_summary["Total Students"] = school_summary["Total Students"].map("{:,.0f}".format) 
school_summary["Total School Budget"] = school_summary["Total School Budget"].map("${:,.2f}".format) 
school_summary["Per Student Budget"] = school_summary["Per Student Budget"].map("${:,.2f}".format) 
#Remove the index column name and pring out the final summary
school_summary

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,74.306672
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,73.807983
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,94.379391
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027


## Top Performing Schools (By Passing Rate)

In [104]:
#Sort the School Summary by top performing schools 
top_performing_schools = school_summary.sort_values("% Overall Passing Rate", ascending=False)
top_performing_schools.head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.418349,83.84893,93.272171,97.308869,95.29052
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.274201,83.989488,93.867718,96.539641,95.203679


## Bottom Performing Schools (By Passing Rate)

In [105]:
#Sort the School Summary by bottom performing schools 
bottom_performing_schools = school_summary.sort_values("% Overall Passing Rate")
bottom_performing_schools.head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.842711,80.744686,66.366592,80.220055,73.293323
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308


## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

## Reading Score by Grade 

* Perform the same operations as above for reading scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [None]:
# Sample bins. Feel free to create your own bins.
spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

## Scores by School Size

* Perform the same operations as above, based on school size.

In [None]:
# Sample bins. Feel free to create your own bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

## Scores by School Type

* Perform the same operations as above, based on school type.