City Schools Scores Analysis 
===

In [3]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "../Resources/schools_complete.csv"
student_data_to_load = "../Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [4]:
## setting formatting for dataframe output

###header formatting
heading_format = [
  ('font-size', '14px'),
  ('text-align', 'right'),
  ('font-weight', 'bold'),
  ('background-color', '#CFCFCF')]

###body formatting
element_format = [('font-size', '13px')]

###caption formatting

caption_element = [
  ('font-size', '15px'),
  ('text-align', 'center'),
  ('font-weight', 'bold')]

### put it together
styles = [dict(selector="th", props=heading_format),
         dict(selector="td", props=element_format), 
         dict(selector="caption", props=caption_element)]

###formating column header names (display formatting)

school_data_complete_renamed = school_data_complete.rename(columns = {"school_name" : "School Name", 
                                                                      "type" : "School Type"})

---
## District Summary

In [5]:
###create dataframes with students achieving passing scores (70 or greater)

passing_math = school_data_complete_renamed.loc[school_data_complete_renamed["math_score"] >= 70, :]

passing_reading = school_data_complete_renamed.loc[school_data_complete["reading_score"] >= 70, :]

passing_overall = school_data_complete_renamed.loc[(school_data_complete_renamed["math_score"] >= 70) 
                                           & (school_data_complete_renamed["reading_score"] >= 70), :]

###create dataframe with findings (inclusive of calculation steps)

summary_df = pd.DataFrame({
    
    "Total Schools" : [school_data_complete_renamed["School Name"].nunique()],
    "Total Students" : school_data_complete_renamed["Student ID"].count(), 
    "Total Budget": sum(school_data_complete_renamed["budget"].unique()),
    "Average Math Score" : school_data_complete_renamed["math_score"].mean(), 
    "Average Reading Score" : school_data_complete_renamed["reading_score"].mean(), 
    "% Passing Math" : passing_math["Student ID"].count()/school_data_complete_renamed["Student ID"].count(),
    "% Passing Reading" : passing_reading["Student ID"].count()/school_data_complete_renamed["Student ID"].count(), 
    "% Overall Passing" : passing_overall["Student ID"].count()/school_data_complete_renamed["Student ID"].count()})

###format and display findings 

summary_df.style.set_caption("District Level Results"
                ).format({
                        "Total Schools": "{:,}", 
                        "Total Students" : "{:,}", 
                        "Total Budget" : "${:,}",
                        "Average Math Score" : "{:,.3f}", 
                        "Average Reading Score" : "{:,.3f}",
                        "% Passing Math": "{:,.2%}", 
                        "% Passing Reading": "{:,.2%}",
                        "% Overall Passing" : "{:,.2%}"}
                ).set_table_styles(styles
                ).hide_index()

Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
15,39170,"$24,649,428",78.985,81.878,74.98%,85.81%,65.17%


---

## School Summary

In [6]:
###stratify data by school name and type

school_grouped_data = school_data_complete_renamed.groupby(["School Name", "School Type"])

school_grouped_data_passing_math = passing_math.groupby(["School Name", "School Type"])

school_grouped_data_passing_reading = passing_reading.groupby(["School Name", "School Type"])

school_grouped_data_passing_overall = passing_overall.groupby(["School Name", "School Type"])

###create dataframe (inclusive of steps to create series)

school_summary_df = pd.DataFrame({
                        
    "Total Students" : school_grouped_data["Student ID"].count(),
    "Total School Budget": school_grouped_data["budget"].unique(), 
    "Per Student Budget": school_grouped_data["budget"].unique()/school_grouped_data["Student ID"].nunique(), 
    "Average Math Score" : school_grouped_data["math_score"].mean(), 
    "Average Reading Score" : school_grouped_data["reading_score"].mean(),  
    "% Passing Math" : school_grouped_data_passing_math["Student ID"].count()/school_grouped_data["Student ID"].count(),
    "% Passing Reading" : school_grouped_data_passing_reading["Student ID"].count()/school_grouped_data["Student ID"].count(), 
    "% Overall Passing" : school_grouped_data_passing_overall["Student ID"].count()/school_grouped_data["Student ID"].count()})

###change objects to series to allow for formatting

school_summary_df["Total School Budget"] = school_summary_df["Total School Budget"].astype(int)
school_summary_df["Per Student Budget"] = school_summary_df["Per Student Budget"].astype(int)

###format and display findings

school_summary_df.style.set_caption("School Level Results"
                      ).format({"Total Students" : "{:,}", 
                                "Total School Budget" : "${:,}", "Per Student Budget" : "${:,}",
                                "Average Math Score" : "{:,.3f}", "Average Reading Score" : "{:,.3f}", 
                                "% Passing Math": "{:,.2%}", "% Passing Reading": "{:,.2%}",
                                "% Overall Passing" : "{:,.2%}"}
                      ).set_table_styles(styles)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,School Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928",$628,77.048,81.034,66.68%,81.93%,54.64%
Cabrera High School,Charter,1858,"$1,081,356",$582,83.062,83.976,94.13%,97.04%,91.33%
Figueroa High School,District,2949,"$1,884,411",$639,76.712,81.158,65.99%,80.74%,53.20%
Ford High School,District,2739,"$1,763,916",$644,77.103,80.746,68.31%,79.30%,54.29%
Griffin High School,Charter,1468,"$917,500",$625,83.351,83.817,93.39%,97.14%,90.60%
Hernandez High School,District,4635,"$3,022,020",$652,77.29,80.934,66.75%,80.86%,53.53%
Holden High School,Charter,427,"$248,087",$581,83.803,83.815,92.51%,96.25%,89.23%
Huang High School,District,2917,"$1,910,635",$655,76.629,81.183,65.68%,81.32%,53.51%
Johnson High School,District,4761,"$3,094,650",$650,77.072,80.966,66.06%,81.22%,53.54%
Pena High School,Charter,962,"$585,858",$609,83.84,84.045,94.59%,95.95%,90.54%


---

## Top Performing Schools (By % Overall Passing)

In [7]:
###sort the dataframe

school_summary_df_sorted = school_summary_df.sort_values("% Overall Passing", ascending=False)

###format and display top 5 findings

school_summary_df_sorted.head(5).style.set_caption("Top Five Performing Schools by Percentage Passing Overall"
                                     ).format({
                                                "Total Students" : "{:,}", 
                                                "Total School Budget" : "${:,}", "Per Student Budget" : "${:,}",
                                                "Average Math Score" : "{:,.3f}", "Average Reading Score" : "{:,.3f}", 
                                                "% Passing Math": "{:,.2%}", "% Passing Reading": "{:,.2%}",
                                                "% Overall Passing" : "{:,.2%}"}
                                    ).set_table_styles(styles)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,School Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,"$1,081,356",$582,83.062,83.976,94.13%,97.04%,91.33%
Thomas High School,Charter,1635,"$1,043,130",$638,83.418,83.849,93.27%,97.31%,90.95%
Griffin High School,Charter,1468,"$917,500",$625,83.351,83.817,93.39%,97.14%,90.60%
Wilson High School,Charter,2283,"$1,319,574",$578,83.274,83.989,93.87%,96.54%,90.58%
Pena High School,Charter,962,"$585,858",$609,83.84,84.045,94.59%,95.95%,90.54%


---
## Bottom Performing Schools (By % Overall Passing)

In [8]:
###format and display top 5 findings

school_summary_df_sorted.tail(5).style.format({"Total Students" : "{:,}", 
                                               "Total School Budget" : "{:,}", 
                                               "Per Student Budget" : "${:,}",
                                               "Average Math Score" : "{:,.3f}", 
                                               "Average Reading Score" : "{:,.3f}", 
                                               "% Passing Math": "{:,.2%}", "% Passing Reading": "{:,.2%}",
                                               "% Overall Passing" : "{:,.2%}"}
                                     ).set_caption("Bottom Five Performing Schools by Percentage Passing Overall"
                                     ).set_table_styles(styles)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,School Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Johnson High School,District,4761,3094650,$650,77.072,80.966,66.06%,81.22%,53.54%
Hernandez High School,District,4635,3022020,$652,77.29,80.934,66.75%,80.86%,53.53%
Huang High School,District,2917,1910635,$655,76.629,81.183,65.68%,81.32%,53.51%
Figueroa High School,District,2949,1884411,$639,76.712,81.158,65.99%,80.74%,53.20%
Rodriguez High School,District,3999,2547363,$637,76.843,80.745,66.37%,80.22%,52.99%


---
## Math Scores by Grade

In [9]:
###create dataframes by grade

nineth_grade_data = school_data_complete_renamed.loc[school_data_complete_renamed["grade"] == "9th", :]

tenth_grade_data = school_data_complete_renamed.loc[school_data_complete_renamed["grade"] == "10th", :]

eleventh_grade_data = school_data_complete_renamed.loc[school_data_complete_renamed["grade"] == "11th", :]

twelth_grade_data = school_data_complete_renamed.loc[school_data_complete_renamed["grade"] == "12th", :]


###create dataframe of findings (including steps for pulling math scores by grade)

math_score_df = pd.DataFrame({"9th" : nineth_grade_data.groupby("School Name")["math_score"].mean(), 
                              "10th" : tenth_grade_data.groupby("School Name")["math_score"].mean(), 
                              "11th" : eleventh_grade_data.groupby("School Name")["math_score"].mean(),
                              "12th" : twelth_grade_data.groupby("School Name")["math_score"].mean()})

###format and display findings

math_score_df.style.set_caption("Average Math Score by Grade"
                  ).format({
                            "9th" : "{:,.3f}", 
                           "10th" : "{:,.3f}", 
                           "11th" : "{:,.3f}", 
                           "12th" : "{:,.3f}"}
                 ).set_table_styles(styles)

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.084,76.997,77.516,76.492
Cabrera High School,83.095,83.155,82.766,83.277
Figueroa High School,76.403,76.54,76.884,77.151
Ford High School,77.361,77.672,76.918,76.18
Griffin High School,82.044,84.229,83.842,83.356
Hernandez High School,77.438,77.337,77.136,77.187
Holden High School,83.787,83.43,85.0,82.855
Huang High School,77.027,75.909,76.447,77.226
Johnson High School,77.188,76.691,77.492,76.863
Pena High School,83.625,83.372,84.328,84.122


---
## Reading Score by Grade 

In [10]:
###create dataframe (including steps for pulling reading scores by grade)

reading_score_df = pd.DataFrame({"9th" : nineth_grade_data.groupby("School Name")["reading_score"].mean(), 
                                 "10th" : tenth_grade_data.groupby("School Name")["reading_score"].mean(),
                                 "11th" : eleventh_grade_data.groupby("School Name")["reading_score"].mean(),
                                 "12th" : twelth_grade_data.groupby("School Name")["reading_score"].mean()})

###format and display findings 

reading_score_df.style.set_caption("Average Reading Score by Grade"
                                  ).format({
                                            "9th" : "{:,.3f}", 
                                            "10th" : "{:,.3f}", 
                                            "11th" : "{:,.3f}", 
                                            "12th" : "{:,.3f}"}
                                  ).set_table_styles(styles)

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303,80.907,80.946,80.912
Cabrera High School,83.676,84.253,83.788,84.288
Figueroa High School,81.199,81.409,80.64,81.385
Ford High School,80.633,81.263,80.404,80.662
Griffin High School,83.369,83.707,84.288,84.014
Hernandez High School,80.867,80.66,81.396,80.857
Holden High School,83.677,83.325,83.816,84.699
Huang High School,81.29,81.512,81.417,80.306
Johnson High School,81.261,80.773,80.616,81.228
Pena High School,83.807,83.612,84.336,84.591


---
## Scores by School Spending

In [30]:
#define bins

spending_labels = ["<$585", "$585-629", "$630-644", "$645-675"]

spending_bins = [0, 585, 629, 644, 675]

#bin the raw dataframes

spending_strat_df = school_data_complete_renamed

spending_strat_df["Spending Ranges (Per Student)"] = pd.cut((spending_strat_df["budget"]/spending_strat_df["size"]), 
                                                            spending_bins, labels=spending_labels)

spending_passing_math = spending_strat_df.loc[spending_strat_df["math_score"] >=70, :]

spending_passing_reading = spending_strat_df.loc[spending_strat_df["reading_score"] >=70, :]

spending_passing_overall = spending_strat_df.loc[(spending_strat_df["math_score"] >=70) & 
                                                 (spending_strat_df["reading_score"] >=70), :]


### Stratify data by spending

spending_grouped_data = spending_strat_df.groupby("Spending Ranges (Per Student)")

spending_grouped_data_passing_math = spending_passing_math.groupby("Spending Ranges (Per Student)")

spending_grouped_data_passing_reading = spending_passing_reading.groupby("Spending Ranges (Per Student)")

spending_grouped_data_passing_overall = spending_passing_overall.groupby("Spending Ranges (Per Student)")

###create dataframe (inclusive of calculations steps)

spending_strat_summary_df = pd.DataFrame({
                        
    "Average Math Score" : spending_grouped_data["math_score"].mean(), 
    "Average Reading Score" : spending_grouped_data["reading_score"].mean(),  
    "% Passing Math" : spending_grouped_data_passing_math["Student ID"].count()/spending_grouped_data["Student ID"].count(), 
    "% Passing Reading" : spending_grouped_data_passing_reading["Student ID"].count()/spending_grouped_data["Student ID"].count(), 
    "% Overall Passing" : spending_grouped_data_passing_overall["Student ID"].count()/spending_grouped_data["Student ID"].count()})
    
    
###format and display findings

spending_strat_summary_df.style.set_caption("Scores by Spending per Student"
                               ).format({
                                        "Average Math Score" : "{:,.3f}", 
                                        "Average Reading Score" : "{:,.3f}", 
                                        "% Passing Math": "{:,.2%}",
                                        "% Passing Reading": "{:,.2%}",
                                        "% Overall Passing" : "{:,.2%}"}
                              ).set_table_styles(styles)

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.363,83.964,93.70%,96.69%,90.64%
$585-629,79.983,82.313,79.11%,88.51%,70.94%
$630-644,77.821,81.301,70.62%,82.60%,58.84%
$645-675,77.049,81.006,66.23%,81.11%,53.53%


---
## Scores by School Size

In [31]:
#define bins

size_labels = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

size_bins = [0, 999, 2000, 5000]


#bin the raw dataframes

size_strat_df = school_data_complete_renamed

size_strat_df["School Size (No. of Students)"] = pd.cut(size_strat_df["size"], 
                                                        size_bins, labels=size_labels)
size_passing_math = size_strat_df.loc[size_strat_df["math_score"] >=70, :]

size_passing_reading = size_strat_df.loc[size_strat_df["reading_score"] >=70, :]

size_passing_overall = size_strat_df.loc[(size_strat_df["math_score"] >=70) & 
                                                 (size_strat_df["reading_score"] >=70), :]

### Stratify data by school size

size_grouped_data = size_strat_df.groupby("School Size (No. of Students)")

size_grouped_data_passing_math = size_passing_math.groupby("School Size (No. of Students)")

size_grouped_data_passing_reading = size_passing_reading.groupby("School Size (No. of Students)")

size_grouped_data_passing_overall = size_passing_overall.groupby("School Size (No. of Students)")

###create dataframe (inclusive of calculations steps)

size_strat_summary_df = pd.DataFrame({
                    
    "Average Math Score" : size_grouped_data["math_score"].mean(), 
    "Average Reading Score" : size_grouped_data["reading_score"].mean(),  
    "% Passing Math" : size_grouped_data_passing_math["Student ID"].count()/size_grouped_data["Student ID"].count(),
    "% Passing Reading" : size_grouped_data_passing_reading["Student ID"].count()/size_grouped_data["Student ID"].count(), 
    "% Overall Passing" : size_grouped_data_passing_overall["Student ID"].count()/size_grouped_data["Student ID"].count()})

###format and display findings

size_strat_summary_df.style.set_caption("Scores by School Size"
                          ).format({
                                    "Average Math Score" : "{:,.3f}", "Average Reading Score" : "{:,.3f}", 
                                    "% Passing Math": "{:,.2%}", "% Passing Reading": "{:,.2%}",
                                    "% Overall Passing" : "{:,.2%}"}
                         ).set_table_styles(styles)

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size (No. of Students),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.829,83.974,93.95%,96.04%,90.14%
Medium (1000-2000),83.373,83.868,93.62%,96.77%,90.62%
Large (2000-5000),77.478,81.199,68.65%,82.13%,56.57%


---
## Scores by School Type

In [32]:
### Stratify data by school type

type_grouped_data = school_data_complete_renamed.groupby("School Type")

type_grouped_data_passing_math = passing_math.groupby("School Type")

type_grouped_data_passing_reading = passing_reading.groupby( "School Type")

type_grouped_data_passing_overall = passing_overall.groupby("School Type")

###create dataframe (inclusive of calculations steps)

type_summary_df = pd.DataFrame({
                         
    "Average Math Score" : type_grouped_data["math_score"].mean(), 
    "Average Reading Score" : type_grouped_data["reading_score"].mean(),  
    "% Passing Math" : type_grouped_data_passing_math["Student ID"].count()/type_grouped_data["Student ID"].count(),
    "% Passing Reading" : type_grouped_data_passing_reading["Student ID"].count()/type_grouped_data["Student ID"].count(), 
    "% Overall Passing" : type_grouped_data_passing_overall["Student ID"].count()/type_grouped_data["Student ID"].count()})

###format and display findings

type_summary_df.style.set_caption("Scores by School Type"
                    ).format({
                            "Average Math Score" : "{:,.3f}", 
                            "Average Reading Score" : "{:,.3f}", 
                            "% Passing Math": "{:,.2%}",
                            "% Passing Reading": "{:,.2%}",
                            "% Overall Passing" : "{:,.2%}"}
                    ).set_table_styles(styles)

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.406,83.903,93.70%,96.65%,90.56%
District,76.987,80.962,66.52%,80.91%,53.70%
