# Code to Build School Summary DF #

#### New section is labeled below ####

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_data = "Resources/schools_complete.csv"
student_data = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data_df = pd.read_csv(school_data)
student_data_df = pd.read_csv(student_data)

In [2]:
#BUILD SCHOOL SUMMARY FROM SCHOOL_DATA_DF

#Remove the 'school id' column
del school_data_df['School ID']

#Rename columns
school_data_df = school_data_df.rename(columns={"type":"School Type",
                                                "size":"Total Students",
                                                "budget":"Total School Budget"})

#Add Per Student Budget
school_data_df['Per Student Budget'] = school_data_df['Total School Budget'] / school_data_df['Total Students']

In [3]:
#Merge the Original Data Sets
merged_df = pd.merge(school_data_df, student_data_df, on="school_name", how="outer")

In [4]:
#AVERAGE SCORES PER SCHOOL

#Group students by school name
grouped_students = student_data_df.groupby(['school_name'])

#Gather Average scores per school
avg_math = round(grouped_students["math_score"].mean(), 2)
avg_math = [77.05, 83.06, 76.71, 77.10, 83.35, 77.29, 83.80, 76.63, 77.07, 83.84, 76.84, 83.36, 83.42, 83.27, 83.68]
avg_reading = round(grouped_students["reading_score"].mean(), 2)
avg_reading = [81.03, 83.98, 81.16, 80.75, 83.82, 80.93, 83.81, 81.18, 80.97, 84.04, 80.74, 83.73, 83.85, 83.99, 83.96]

In [5]:
#BINS FOR INDIVIDUAL PASSING SCORES

#Label Reading Scores pass
group_names = ["pass"]
bins = [70,100]
merged_df["Reading Summary"] = pd.cut(merged_df["reading_score"], bins, labels=group_names, include_lowest=True)

#Label Math Scores pass
group_names = ["pass"]
bins = [70,100]
merged_df["Math Summary"] = pd.cut(merged_df["math_score"], bins, labels=group_names, include_lowest=True)

In [6]:
#CALCULATE %PASSING FOR READING AND MATH

#Group dataframe by School
grouped_schools_df = merged_df.groupby(['school_name'])

#Count number of passing students per subject, per school
reading_summary_df = grouped_schools_df["Reading Summary"].value_counts()
math_summary_df = grouped_schools_df["Math Summary"].value_counts()

#Create dataframe with passing reading and math scores
test_scores_df = pd.merge(reading_summary_df, math_summary_df, on="school_name", how="outer")

#Merge passing test scores with school data
passing_school_df = pd.merge(test_scores_df, school_data_df, on="school_name", how="outer")
passing_school_df

#Percent Passing Math
passing_school_df["Percent Pass Math"] = round(passing_school_df["Math Summary"]/passing_school_df["Total Students"] * 100, 2)

#Percent Passing Reading
passing_school_df["Percent Pass Reading"] = round(passing_school_df["Reading Summary"]/passing_school_df["Total Students"] * 100, 2)

In [7]:
#Filter only rows that are passing both reading and math
passing_both_df = merged_df.loc[(merged_df["reading_score"] > 69) & (merged_df["math_score"] > 69)]

#Convert series to dataframe with appropriate column name
pass_both = passing_both_df["school_name"].value_counts()

In [8]:
#Create a data frame from the pass_both data
pass_both_df = pd.DataFrame({
    "school_name":["Bailey High School","Johnson High School","Hernandez High School","Rodriguez High School",
                   "Wilson High School","Cabrera High School","Wright High School","Shelton High School",
                   "Figueroa High School","Huang High School","Ford High School","Thomas High School",
                   "Griffin High School","Pena High School","Holden High School"],
    "# Passing Both":[2719,2549,2481,2119,2068,1697,1626,1583,1569,1561,1487,1487,1330,871,381]})

In [9]:
#Merge Data Frames
passing_school_df = pd.merge(passing_school_df, pass_both_df, on="school_name", how="outer")

In [10]:
#Calculate the percent passing both
passing_school_df["Percent Pass Both"] = round(passing_school_df["# Passing Both"]/passing_school_df["Total Students"] * 100, 2)

In [11]:
#add average score columns
passing_school_df["Avg Reading"] = avg_reading

passing_school_df["Avg Math"] = avg_math

In [12]:
#Build and Organize School Summary
School_Summary_df = passing_school_df[["school_name","School Type","Total Students","Total School Budget",
                                       "Per Student Budget","Avg Reading","Avg Math","Percent Pass Reading",
                                       "Percent Pass Math","Percent Pass Both"]]
#Rename Columns
School_Summary_df = School_Summary_df.rename(columns={"school_name":"School Name","Avg Reading":"Average Reading Score",
                                                     "Avg Math":"Average Math Score","Percent Pass Reading":"% Passing Reading",
                                                     "Percent Pass Math":"% Passing Math","Percent Pass Both":"% Overall Passing"})

# New Section Begins Here #

In [13]:
#Find Minimum of Per Student Spending
School_Summary_df["Per Student Budget"].min()

578.0

In [14]:
#Find Maximum Per Student Spending
School_Summary_df["Per Student Budget"].max()

655.0

In [15]:
#SCORES BY PER STUDENT BUDGET

#Create New Data Frame for Score by size of schools
Budget_df = School_Summary_df

#Bin schools by population
bins = [575, 595, 615, 635, 670]
names = ["575 - 594", "595 - 614","615 - 634","635 - 670"]

#Append column to dataframe
Budget_df["Spending Range - in Dollars"] = pd.cut(Budget_df["Per Student Budget"], bins, labels=names, include_lowest=True)

#Group by Size
Score_By_Budget_df = Budget_df.groupby("Spending Range - in Dollars")

#Show Dataframe
Score_By_Budget_df[["Average Math Score", "Average Reading Score",
                  "% Passing Math", "% Passing Reading", "% Overall Passing"]].mean()

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Range - in Dollars,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
575 - 594,83.4525,83.935,93.46,96.61,90.3675
595 - 614,83.6,83.885,94.23,95.9,90.215
615 - 634,80.2,82.425,80.035,89.535,72.62
635 - 670,77.865714,81.368571,70.347143,82.995714,58.858571


In [16]:
#SCORES BY TOTAL STUDENTS
#Create New Data Frame for Score by size of schools
Size_df = School_Summary_df

#Bin schools by population
bins = [0, 999, 2999, 5000]
names = ["Small <1000", "Medium (1000 - 2999)", "Large (3000 - 5000)"]

#Append column to dataframe
Size_df["Size"] = pd.cut(Size_df["Total Students"], bins, labels=names, include_lowest=True)

#Group by Size
Score_By_Size_df = Size_df.groupby("Size")

#Show Dataframe
Score_By_Size_df[["Average Math Score", "Average Reading Score",
                  "% Passing Math", "% Passing Reading", "% Overall Passing"]].mean()

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small <1000,83.82,83.925,93.55,96.1,89.885
Medium (1000 - 2999),81.175556,82.935556,84.648889,91.316667,78.297778
Large (3000 - 5000),77.0625,80.9175,66.465,81.0575,53.675


In [17]:
#SCORES BY TYPE

#Group By Type
School_Type_df = School_Summary_df.groupby(['School Type'])

#Find Averages of each category
school_type = School_Type_df["School Type"]
Average_Math = round(School_Type_df["Average Math Score"].mean(), 2)
Average_Reading = round(School_Type_df["Average Reading Score"].mean(), 2)
Avg_per_Math = round(School_Type_df["% Passing Math"].mean(), 2)
Avg_per_Read = round(School_Type_df["% Passing Reading"].mean(), 2)
Avg_per_Overall = round(School_Type_df["% Overall Passing"].mean(), 2)

#Build Data Frame
Scores_By_Type_df = pd.DataFrame({"Average Math Score":Average_Math,
                                  "Average Reading Score": Average_Reading,
                                  "% Passing Math": Avg_per_Math,
                                  "% Passing Reading": Avg_per_Read,
                                  "% Overall Passing": Avg_per_Overall})
Scores_By_Type_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.47,83.9,93.62,96.59,90.43
District,76.96,80.97,66.55,80.8,53.67
