# District Assessment Python Notebook

In [1]:
# Packages
import locale as loc
import os
import pandas as pd

In [2]:
# Data File Paths
school_data_file_path = os.path.join(".","schools_complete.csv")
student_data_file_path = os.path.join(".","students_complete.csv")

In [234]:
# Read Data Files
df_school = pd.read_csv(school_data_file_path)
df_student = pd.read_csv(student_data_file_path)

## District Summary

In [253]:
# Count students, students passing math, and students passing reading, for later ratios
student_count = df_school["size"].sum()
pass_math_count = (df_student["math_score"] >= 60).sum()
pass_reading_count = (df_student["reading_score"] >= 60).sum()

# Construct a District Summary DataFrame, by combining scalar summaries from the two raw DataFrames
df_district_summary2 = pd.DataFrame()
df_district_summary2["Total Schools"] = [df_school["School ID"].count()]
df_district_summary2["Total Students"] = student_count
df_district_summary2["Total Budget"] = df_school["budget"].sum()
df_district_summary2["Average Math Score"] = df_student["math_score"].mean()
df_district_summary2["Average Reading Score"] = df_student["reading_score"].mean()
df_district_summary2["Math Pass Rate"] = pass_math_count/student_count
df_district_summary2["Reading Pass Rate"] = pass_reading_count/student_count
df_district_summary2["Overall Pass Rate"] = 0.5 * (pass_math_count + pass_reading_count)/student_count

# Format the DataFrame for Output
df_district_summary2.style.format({
    "Total Students" : "{:,}".format,
    "Total Budget" : "${:,}".format,
    "Average Math Score" : "{:.2f}".format,
    "Average Reading Score" : "{:.2f}".format,
    "Math Pass Rate" : "{:.2%}".format,
    "Reading Pass Rate" : "{:.2%}".format,
    "Overall Pass Rate" : "{:.2%}".format
}).hide_index()

Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Math Pass Rate,Reading Pass Rate,Overall Pass Rate
15,39170,"$24,649,428",78.99,81.88,92.45%,100.00%,96.22%


## School Summary
# Norman -- Clean This Up!

In [260]:
# Aggregate raw student data by school
df_students_by_school = pd.concat(
    [df_student[["school_name","reading_score","math_score"]],
     (df_student["reading_score"] >= 60).rename("is_reading_passed"),
     (df_student["math_score"] >= 60).rename("is_math_passed")
    ],axis=1).groupby("school_name").agg(
        {"reading_score":"mean",
         "math_score":"mean",
         "is_reading_passed":"sum",
         "is_math_passed":"sum"
        }).reset_index(drop=True)

df_sorted_schools = df_school[["school_name","type","size","budget"]].sort_values("school_name",ignore_index=True)
df_school_stats = pd.concat([df_sorted_schools,df_students_by_school], axis=1)
df_school_stats
df_school_summary = pd.concat([
    df_school_stats[["school_name","type","size"]], 
    (df_school_stats["budget"]/df_school_stats["size"]).rename("Per Student Budget"),
    df_school_stats[["math_score","reading_score"]],
    (df_school_stats["is_math_passed"]/df_school_stats["size"]).rename("% Passing Math"),
    (df_school_stats["is_reading_passed"]/df_school_stats["size"]).rename("% Passing Reading"),
    (0.5 * (df_school_stats["is_reading_passed"] + df_school_stats["is_math_passed"]) \
     /df_school_stats["size"]).rename("Overall Passing Rate")], axis=1)

df_school_summary.rename(columns={"school_name" : "School Name", 
                                  "type" : "School Type",
                                  "size" : "Total Students",
                                  "math_score" : "Average Math Score",
                                  "reading_score" : "Average Reading Score"}, inplace=True)
df_school_summary.style.format({
    "Per Student Budget" : "${:,.2f}".format,
    "Average Math Score" : "{:.2f}".format,
    "Average Reading Score" : "{:.2f}".format,
    "% Passing Math" : "{:.2%}".format,
    "% Passing Reading" : "{:.2%}".format,
    "Overall Passing Rate" : "{:.2%}".format
}).hide_index()

School Name,School Type,Total Students,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Bailey High School,District,4976,$628.00,77.05,81.03,89.53%,100.00%,94.76%
Cabrera High School,Charter,1858,$582.00,83.06,83.98,100.00%,100.00%,100.00%
Figueroa High School,District,2949,$639.00,76.71,81.16,88.44%,100.00%,94.22%
Ford High School,District,2739,$644.00,77.1,80.75,89.30%,100.00%,94.65%
Griffin High School,Charter,1468,$625.00,83.35,83.82,100.00%,100.00%,100.00%
Hernandez High School,District,4635,$652.00,77.29,80.93,89.08%,100.00%,94.54%
Holden High School,Charter,427,$581.00,83.8,83.81,100.00%,100.00%,100.00%
Huang High School,District,2917,$655.00,76.63,81.18,88.86%,100.00%,94.43%
Johnson High School,District,4761,$650.00,77.07,80.97,89.18%,100.00%,94.59%
Pena High School,Charter,962,$609.00,83.84,84.04,100.00%,100.00%,100.00%


In [8]:
pd.__version__
#In the Anaconda prompt, I typed "conda update --all"

'1.0.1'

In [95]:
#Top Performing Schools (By Passing Rate)
#Create a table that highlights the top 5 performing schools based on Overall Passing Rate.
df_school_summary.sort_values(["Overall Passing Rate","Average Math Score"],ascending=False).head(5)

Unnamed: 0,School Name,School Type,Total Students,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
9,Pena High School,Charter,962,609.0,83.839917,84.044699,1.0,1.0,1.0
6,Holden High School,Charter,427,581.0,83.803279,83.814988,1.0,1.0,1.0
14,Wright High School,Charter,1800,583.0,83.682222,83.955,1.0,1.0,1.0
12,Thomas High School,Charter,1635,638.0,83.418349,83.84893,1.0,1.0,1.0
11,Shelton High School,Charter,1761,600.0,83.359455,83.725724,1.0,1.0,1.0


In [94]:
#Bottom Performing Schools (By Passing Rate)
#Create a table that highlights the bottom 5 performing schools based on Overall Passing Rate. Include all of the same metrics as above.
df_school_summary.sort_values("Overall Passing Rate").head(5)

Unnamed: 0,School Name,School Type,Total Students,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
2,Figueroa High School,District,2949,639.0,76.711767,81.15802,0.884368,1.0,0.942184
10,Rodriguez High School,District,3999,637.0,76.842711,80.744686,0.885471,1.0,0.942736
7,Huang High School,District,2917,655.0,76.629414,81.182722,0.888584,1.0,0.944292
5,Hernandez High School,District,4635,652.0,77.289752,80.934412,0.890831,1.0,0.945415
8,Johnson High School,District,4761,650.0,77.072464,80.966394,0.891829,1.0,0.945915


In [139]:
#Math Scores by Grade**
#Create a table that lists the average Math Score for students of each grade level (9th, 10th, 11th, 12th) at each school.
df_math_by_school_by_grade = pd.concat([df_student[["school_name","grade","math_score"]], 
                                        df_student["grade"].str.replace("9","09").rename("grade_sort")],axis=1)

df118 = pd.pivot_table(df_math_by_school_by_grade, index="school_name", columns="grade_sort", 
               values="math_score", aggfunc="mean").reset_index()

df118.rename(columns={"09th" : "9th"}, inplace=True)

df118.style.format({
    "9th" : "{:,.2f}".format,
    "10th" : "{:.2f}".format,
    "11th" : "{:.2f}".format,
    "12th" : "{:.2f}".format
}).hide_index()

school_name,9th,10th,11th,12th
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


In [142]:
#Reading Scores by Grade
#Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.
df_reading_by_school_by_grade = pd.concat([df_student[["school_name","grade","reading_score"]], 
                                        df_student["grade"].str.replace("9","09").rename("grade_sort")],axis=1)

df141 = pd.pivot_table(df_reading_by_school_by_grade, index="school_name", columns="grade_sort", 
               values="reading_score", aggfunc="mean").reset_index()

df141.rename(columns={"09th" : "9th"}, inplace=True)

df141.style.format({
    "9th" : "{:,.2f}".format,
    "10th" : "{:.2f}".format,
    "11th" : "{:.2f}".format,
    "12th" : "{:.2f}".format
}).hide_index()

school_name,9th,10th,11th,12th
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


In [225]:
#Scores by School Spending
#Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
#[575,600),[600,625),[625,650),[650,675)
#Average Math Score
#Average Reading Score
#% Passing Math
#% Passing Reading
#Overall Passing Rate (Average of the above two)
spending_bins = pd.cut(df_school_summary["Per Student Budget"],range(575,700, 25),right=False).rename("Budget Range")
df_with_bin_weighted = pd.concat([spending_bins, 
                         df_school_summary["Total Students"],
                         df_school_summary[["Average Math Score", 
                                            "Average Reading Score",
                                            "% Passing Math",
                                            "% Passing Reading",
                                            "Overall Passing Rate"]].multiply(df_school_summary["Total Students"],axis="index")], axis=1)
df_with_bin_weighted["School Count"] = 1

df_grouped_by_bins = df_with_bin_weighted.groupby("Budget Range").agg("sum").reset_index()
df_grouped_by_bins

df_bin_report = pd.concat([df_grouped_by_bins["Budget Range"],
    df_grouped_by_bins["School Count"],
    df_grouped_by_bins[["Average Math Score", 
                        "Average Reading Score",
                        "% Passing Math",
                        "% Passing Reading",
                        "Overall Passing Rate"]].divide(df_grouped_by_bins["Total Students"],axis="index")],axis=1)

df_bin_report.style.format({
    "Average Math Score" : "{:.2f}".format,
    "Average Reading Score" : "{:.2f}".format,
    "% Passing Math" : "{:.2%}".format,
    "% Passing Reading" : "{:.2%}".format,
    "Overall Passing Rate" : "{:.2%}".format
}).hide_index()



Budget Range,School Count,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
"[575, 600)",4,83.36,83.96,100.00%,100.00%,100.00%
"[600, 625)",2,83.53,83.84,100.00%,100.00%,100.00%
"[625, 650)",6,78.06,81.43,90.92%,100.00%,95.46%
"[650, 675)",3,77.05,81.01,89.07%,100.00%,94.53%


In [224]:
#Scores by School Size
#Repeat the above breakdown, but this time group schools based on a reasonable approximation of school size (Small, Medium, Large).
#[0,1750,3500,5250]
size_bins = pd.cut(df_school_summary["Total Students"],range(0, 7000, 1750),right=True,labels=["Small","Medium","Large"]).rename("Size Range")
df_with_size_bin_weighted = pd.concat([size_bins, 
                         df_school_summary["Total Students"],
                         df_school_summary[["Average Math Score", 
                                            "Average Reading Score",
                                            "% Passing Math",
                                            "% Passing Reading",
                                            "Overall Passing Rate"]].multiply(df_school_summary["Total Students"],axis="index")], axis=1)
df_with_size_bin_weighted["School Count"] = 1

df_agged_by_size = df_with_size_bin_weighted.groupby("Size Range").agg("sum").reset_index()
df_agged_by_size

df_bin_report = pd.concat([df_agged_by_size["Size Range"],
    df_agged_by_size["School Count"],
    df_agged_by_size[["Average Math Score", 
                        "Average Reading Score",
                        "% Passing Math",
                        "% Passing Reading",
                        "Overall Passing Rate"]].divide(df_agged_by_size["Total Students"],axis="index")],axis=1)

df_bin_report.style.format({
    "Average Math Score" : "{:.2f}".format,
    "Average Reading Score" : "{:.2f}".format,
    "% Passing Math" : "{:.2%}".format,
    "% Passing Reading" : "{:.2%}".format,
    "Overall Passing Rate" : "{:.2%}".format
}).hide_index()


Size Range,School Count,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Small,4,83.52,83.88,100.00%,100.00%,100.00%
Medium,7,79.89,82.4,94.12%,100.00%,97.06%
Large,4,77.07,80.93,89.11%,100.00%,94.56%


In [258]:
#Scores by School Type
#Repeat the above breakdown, but this time group schools based on school type (Charter vs. District).
df_schools_weighted = pd.concat([df_school_summary["School Type"],
                         df_school_summary["Total Students"],
                         df_school_summary[["Average Math Score", 
                                            "Average Reading Score",
                                            "% Passing Math",
                                            "% Passing Reading",
                                            "Overall Passing Rate"]].multiply(df_school_summary["Total Students"],
                                axis="index")], axis=1)
df_schools_weighted["School Count"] = 1

df_agged_by_type = df_schools_weighted.groupby("School Type").agg("sum").reset_index()
df_agged_by_type

df_bin_report = pd.concat([df_agged_by_type[["School Type","School Count"]],
    df_agged_by_type[["Average Math Score", 
                        "Average Reading Score",
                        "% Passing Math",
                        "% Passing Reading",
                        "Overall Passing Rate"]].divide(df_agged_by_type["Total Students"],axis="index")],axis=1)

df_bin_report.style.format({
    "Average Math Score" : "{:.2f}".format,
    "Average Reading Score" : "{:.2f}".format,
    "% Passing Math" : "{:.2%}".format,
    "% Passing Reading" : "{:.2%}".format,
    "Overall Passing Rate" : "{:.2%}".format
}).hide_index()

TypeError: cannot concatenate object of type '<class 'tuple'>'; only Series and DataFrame objs are valid