# PyCity School Analysis

## Analysis Report


In [1]:
# imports
import pandas as pd
from pathlib import Path

## Data File Paths
school_file = Path("Resources/schools_complete.csv")
student_file = Path("Resources/students_complete.csv")

## Read the files into DataFrames
school_df = pd.read_csv(school_file)
student_df = pd.read_csv(student_file)

In [20]:
# Check the school_df has been created without error
school_df.head(10)

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500
5,5,Wilson High School,Charter,2283,1319574
6,6,Cabrera High School,Charter,1858,1081356
7,7,Bailey High School,District,4976,3124928
8,8,Holden High School,Charter,427,248087
9,9,Pena High School,Charter,962,585858


In [3]:
# Check the student_df has been created without error
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
# Merge the two df's into one
merged_df = pd.merge(student_df,school_df, how="left", on=["school_name","school_name"])
merged_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [50]:
# Total number of unique schools
total_schools = len(merged_df["school_name"].unique())
total_schools

15

In [40]:
# Total students
total_students = len(merged_df["student_name"])
total_students

39170

In [52]:
# Total budget
total_budget = school_df["budget"].sum()
total_budget

24649428

In [41]:
# Average math score
avg_math = merged_df["math_score"].mean()
avg_math

78.98537145774827

In [42]:
# Average reading score
avg_read = merged_df["reading_score"].mean()
avg_read

81.87784018381414

In [44]:
# % passing math (the percentage of students who passed math)
pass_math_count = merged_df[(merged_df["math_score"] >= 70)].count()["student_name"]
pass_math_perc = (pass_math_count / float(total_students)) * 100 
pass_math_perc

74.9808526933878

In [45]:
# % passing reading (the percentage of students who passed reading)
pass_read_count = merged_df[(merged_df["reading_score"] >= 70)].count()["student_name"]
pass_read_perc = (pass_read_count / float(total_students)) * 100
pass_read_perc

85.80546336482001

In [48]:
# % overall passing (the percentage of students who passed math AND reading)
pass_read_math_count = merged_df[
    (merged_df["math_score"] >= 70) & (merged_df["reading_score"] >= 70 )].count()["student_name"]
overall_pass_rate = (pass_read_math_count / float(total_students)) * 100
overall_pass_rate

65.17232575950983

In [58]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
district_summary = pd.DataFrame({"Total Schools":[total_schools],
                                 "Total Students":[total_students],
                                 "Total Budget":[total_budget],
                                "Average Math Score":[avg_math],
                                "Average Reading Score":[avg_read],
                                "% Passing Math":[pass_math_perc],
                                "% Passing Reading":[pass_read_perc],
                                "% Overall Passing":[overall_pass_rate]})

# Format the columns
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{:.2f}".format)
district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{:.2f}".format)
district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{:.2f}%".format)
district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{:.2f}%".format)
district_summary["% Overall Passing"] = district_summary["% Overall Passing"].map("{:.2f}%".format)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


## School Summary