In [1]:
# Dependencies and Setup
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

In [3]:
# Combine the data into a single dataset. 
school_data = pd.read_csv(school_data_to_load)
student_data= pd.read_csv(student_data_to_load)

#Merge the school and student DataFrames
school_data_complete_raw = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete = school_data_complete_raw.rename({'Student ID': 'student_id', 'School ID': 'school_id'}, axis=1)

In [4]:
#Display two decimal positions for float values
pd.set_option('display.precision',2)

<h1><strong> District Summary <strong></h1>


In [5]:
# Calculate the values to construct the district summary

total_schools   =school_data["school_name"].count()
total_students  =school_data_complete["student_id"].max()+1
math_average    =school_data_complete["math_score"].mean()
reading_average = school_data_complete["reading_score"].mean()
total_budget    = [school_data_complete["budget"].unique().sum()]
percentage_math_passed   =len(school_data_complete.loc[school_data_complete.math_score >= 70])*100/total_students
percentage_reading_passed=len(school_data_complete.loc[school_data_complete.reading_score >= 70])*100/total_students
overall_passed=len(school_data_complete.loc[(school_data_complete.reading_score >= 70) & (school_data_complete.math_score >= 70)])*100/total_students

#District Summary table
district_summary_df= pd.DataFrame({"Total schools":total_schools,
                                   "Total Students":total_students,
                                "Average Reading score": reading_average,
                                  "Average Math score": math_average,
                                  "Total Budget": total_budget,
                                  "Total students": total_students,
                                   "% of Students Passed math" :percentage_math_passed,
                                   "% of Students Passed reading" :percentage_reading_passed,
                                   "Overall Passed" :overall_passed
                                 })
district_summary_df.head()

Unnamed: 0,Total schools,Total Students,Average Reading score,Average Math score,Total Budget,Total students,% of Students Passed math,% of Students Passed reading,Overall Passed
0,15,39170,81.88,78.99,24649428,39170,74.98,85.81,65.17


<h1><strong> School Summary <strong></h1>

In [6]:
#create school Data Frame
grouped_school_df = school_data_complete.groupby(['school_name'])
school_score_df= grouped_school_df[["math_score","reading_score"]].mean()
school_summary_df= pd.merge(school_data, school_score_df, how="left", on=["school_name", "school_name"])
school_summary_df["budget_per_student"]=(school_summary_df["budget"]/school_summary_df["size"])


In [7]:
# find math passed
math_passed = school_data_complete.loc[school_data_complete.math_score >= 70]
math_passed_df=pd.DataFrame(math_passed)
math_grp_df = math_passed_df.groupby(['school_name'])
total_math=math_grp_df["student_name"].count()
total_math_df=pd.DataFrame({"total_math_passed":total_math})

# find reading passed
read_passed = school_data_complete.loc[school_data_complete.reading_score >= 70]
read_passed_df=pd.DataFrame(read_passed)
read_grp_df = read_passed_df.groupby(['school_name'])
total_read=read_grp_df["student_name"].count()
total_read_df=pd.DataFrame({"total_read_passed":total_read})

#Find over all passed 
math_read_passed = school_data_complete.loc[(school_data_complete.math_score >= 70) & (school_data_complete.reading_score >= 70)]
math_read_passed_df=pd.DataFrame(math_read_passed)
math_read_grp_df = math_read_passed_df.groupby(['school_name'])
total_math_read=math_read_grp_df["student_name"].count()
total_math_read_df=pd.DataFrame({"total_math_read_passed":total_math_read})
 
#merge Data frames 
first_merge_df= pd.merge(school_summary_df, total_math_df, how="left", on=["school_name", "school_name"])
second_merge_df= pd.merge(first_merge_df, total_read_df, how="left", on=["school_name", "school_name"])
third_merge_df =pd.merge(second_merge_df, total_math_read_df, how="left", on=["school_name", "school_name"])

#Find Percentage passed in math, reading and overall
third_merge_df["percent_math"]=(third_merge_df["total_math_passed"]/third_merge_df["size"])*100
third_merge_df["percent_reading"]=(third_merge_df["total_read_passed"]/third_merge_df["size"])*100
third_merge_df["over_all_passed_percent"]=(third_merge_df["total_math_read_passed"]/third_merge_df["size"])*100

final_school_summary=third_merge_df[["school_name","type","size","budget","math_score","reading_score","percent_math","percent_reading","over_all_passed_percent"]]
final_school_budget_summary=third_merge_df[["school_name","type","size","budget","math_score","reading_score","percent_math","percent_reading","over_all_passed_percent"]]
final_school_summary

format_dict={'budget':'${:.2f}' }
final_school_summary.style.format(format_dict)
final_school_summary["budget"]=final_school_summary["budget"].map("${:,}".format)
final_school_summary

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,percent_math,percent_reading,over_all_passed_percent
0,Huang High School,District,2917,"$1,910,635",76.63,81.18,65.68,81.32,53.51
1,Figueroa High School,District,2949,"$1,884,411",76.71,81.16,65.99,80.74,53.2
2,Shelton High School,Charter,1761,"$1,056,600",83.36,83.73,93.87,95.85,89.89
3,Hernandez High School,District,4635,"$3,022,020",77.29,80.93,66.75,80.86,53.53
4,Griffin High School,Charter,1468,"$917,500",83.35,83.82,93.39,97.14,90.6
5,Wilson High School,Charter,2283,"$1,319,574",83.27,83.99,93.87,96.54,90.58
6,Cabrera High School,Charter,1858,"$1,081,356",83.06,83.98,94.13,97.04,91.33
7,Bailey High School,District,4976,"$3,124,928",77.05,81.03,66.68,81.93,54.64
8,Holden High School,Charter,427,"$248,087",83.8,83.81,92.51,96.25,89.23
9,Pena High School,Charter,962,"$585,858",83.84,84.04,94.59,95.95,90.54


<h1><strong> Top Performing Schools (by % Overall Passing<strong)</h1>)

In [8]:
#Top Performing Schools (By % Overall Passing)
#Sort and display the top five performing schools by % overall passing.

final_school_summary.sort_values("over_all_passed_percent",ascending=False).head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,percent_math,percent_reading,over_all_passed_percent
6,Cabrera High School,Charter,1858,"$1,081,356",83.06,83.98,94.13,97.04,91.33
14,Thomas High School,Charter,1635,"$1,043,130",83.42,83.85,93.27,97.31,90.95
4,Griffin High School,Charter,1468,"$917,500",83.35,83.82,93.39,97.14,90.6
5,Wilson High School,Charter,2283,"$1,319,574",83.27,83.99,93.87,96.54,90.58
9,Pena High School,Charter,962,"$585,858",83.84,84.04,94.59,95.95,90.54


<h1><strong> Botton Performing schools (by % Overall Passing )<strong></h1>

In [9]:
#Bottom Performing school
final_school_summary.sort_values("over_all_passed_percent",ascending=False).tail()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,percent_math,percent_reading,over_all_passed_percent
12,Johnson High School,District,4761,"$3,094,650",77.07,80.97,66.06,81.22,53.54
3,Hernandez High School,District,4635,"$3,022,020",77.29,80.93,66.75,80.86,53.53
0,Huang High School,District,2917,"$1,910,635",76.63,81.18,65.68,81.32,53.51
1,Figueroa High School,District,2949,"$1,884,411",76.71,81.16,65.99,80.74,53.2
11,Rodriguez High School,District,3999,"$2,547,363",76.84,80.74,66.37,80.22,52.99


<h1><strong> Math Scores By Grade <strong></h1>

In [10]:
math_9 = school_data_complete.loc[school_data_complete.grade=="9th"]
math_group_9= math_9.groupby('school_name').math_score.mean()
math_9th_df=pd.DataFrame({"9th grade math average":math_group_9})

math_10 = school_data_complete.loc[school_data_complete.grade=="10th"]
math_group_10= math_10.groupby('school_name').math_score.mean()
math_10th_df=pd.DataFrame({"10th grade math average":math_group_10})

math_11 = school_data_complete.loc[school_data_complete.grade=="11th"]
math_group_11= math_11.groupby('school_name').math_score.mean()
math_11th_df=pd.DataFrame({"11th grade math average":math_group_11})

math_12 = school_data_complete.loc[school_data_complete.grade=="12th"]
math_group_12= math_12.groupby('school_name').math_score.mean()
math_12th_df=pd.DataFrame({"12th grade math average":math_group_12})

# x = pd.merge(math_group_9,math_group_10, left_index=True, right_on= 'school_name').head()
grade_by_math_summary =pd.concat([math_9th_df,math_10th_df,math_11th_df,math_12th_df],axis=1)
grade_by_math_summary


Unnamed: 0_level_0,9th grade math average,10th grade math average,11th grade math average,12th grade math average
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


<h1><strong> Reading Score By Grade <strong></h1>

In [11]:
read_9 = school_data_complete.loc[school_data_complete.grade=="9th"]
read_group_9= read_9.groupby('school_name').reading_score.mean()
read_9th_df=pd.DataFrame({"9th grade reading average":read_group_9})

read_10 = school_data_complete.loc[school_data_complete.grade=="10th"]
read_group_10= read_10.groupby('school_name').reading_score.mean()
read_10th_df=pd.DataFrame({"10th grade reading average":read_group_10})

read_11 = school_data_complete.loc[school_data_complete.grade=="11th"]
read_group_11= read_11.groupby('school_name').reading_score.mean()
read_11th_df=pd.DataFrame({"11th grade reading average":read_group_11})

read_12 = school_data_complete.loc[school_data_complete.grade=="12th"]
read_group_12= read_12.groupby('school_name').reading_score.mean()
read_12th_df=pd.DataFrame({"9th grade reading average":read_group_12})

# x = pd.merge(math_group_9,math_group_10, left_index=True, right_on= 'school_name').head()
grade_by_read_summary =pd.concat([read_9th_df,read_10th_df,read_11th_df,read_12th_df],axis=1)
grade_by_read_summary

Unnamed: 0_level_0,9th grade reading average,10th grade reading average,11th grade reading average,9th grade reading average
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


<h1><strong>Scores by School Spending <strong></h1>

In [12]:
#Create Dataframe grouped by school spending

bin=[0,999999,2999999,3999999]
group_names=["one million", "two million","three million"]
final_school_budget_summary["school budget"]= pd.cut(final_school_budget_summary["budget"],bin,labels=group_names,include_lowest=True)
score_by_spending=final_school_budget_summary.groupby("school budget")
score_by_spending[["math_score","reading_score","percent_math","percent_reading","over_all_passed_percent"]].mean().round(decimals=2)

Unnamed: 0_level_0,math_score,reading_score,percent_math,percent_reading,over_all_passed_percent
school budget,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one million,83.66,83.89,93.5,96.45,90.12
two million,80.45,82.59,81.65,89.44,74.12
three million,77.14,80.98,66.5,81.34,53.9


<h1><strong> Scores by School Type <strong></h1>

In [13]:
#Create dataFrame with grouped by school type

type_df = final_school_summary.groupby(['type'])
type_df[["math_score","reading_score","percent_reading","percent_math","over_all_passed_percent"]].mean().round(decimals=2)


Unnamed: 0_level_0,math_score,reading_score,percent_reading,percent_math,over_all_passed_percent
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.47,83.9,96.59,93.62,90.43
District,76.96,80.97,80.8,66.55,53.67


<h1><strong> Scores by School Size <strong></h1>

In [None]:
#create dataFrame by school size

bin=[0,999,2999,5000]
group_names=["less than 1000", "bet 1000 and 2000","Over 2000"]
final_school_budget_summary["school size"]= pd.cut(final_school_budget_summary["size"],bin,labels=group_names,include_lowest=True)
score_by_size=final_school_budget_summary.groupby("school size")
score_by_size[["math_score","reading_score","percent_math","percent_reading","over_all_passed_percent"]].mean().round(decimals=2)