# City Schools Pandas Challenge 

This analysis of the district-wide standardized test results aggregates data containing student's math and reading 
scores, as well as various information on the schools they attend. The purpose is to showcase obvious trends in school 
performance.

In [1]:
# Dependencies
import pandas as pd

In [2]:
# Store filepath for the school file
school_file = "Resources/schools_complete.csv"

In [3]:
#Store filepath for the student file
student_file = "Resources/students_complete.csv"

In [4]:
# Read the school file with the pandas library
school_df = pd.read_csv(school_file)
#school_df.head()

In [5]:
# Read the student file with the pandas library
student_df = pd.read_csv(student_file)
#student_df.head()

## District Summary

This is a high level snapshot of the district's key metrics.

In [6]:
#Find district summary information, like total schools
total_schools = school_df["school_name"].count()
#total_schools

In [7]:
total_students = school_df["size"].sum()
#total_students

In [8]:
total_budget = school_df["budget"].sum()
#total_budget

In [9]:
average_math_score = student_df["math_score"].mean()
#average_math_score

In [10]:
average_reading_score = student_df["reading_score"].mean()
#average_reading_score

In [11]:
# Find percent of students passing math, start with total scores:
total_math_score = student_df["math_score"].count()
#total_math_score

# Find passing scores:
passing_math = student_df.loc[(student_df["math_score"] >= 60)]
total_passing_math = len(passing_math)
#passing_math_score
                   
percent_math_pass = total_passing_math / total_math_score * 100   
#percent_math_pass 

In [12]:
# Find percent of students passing reading, start with total scores:
total_reading_score = student_df["reading_score"].count()
#total_reading_score

passing_reading = student_df.loc[(student_df["reading_score"] >= 60)]
total_passing_reading = len(passing_reading)
#passing_reading_score
                   
percent_reading_pass = total_passing_reading / total_reading_score * 100    
#percent_reading_pass 

In [13]:
# Find percent of students passing both math and reading, start with total scores:
total_score = student_df["math_score"].count()
#total_score

passing_score = student_df.loc[(student_df["math_score"] >= 60) & (student_df["reading_score"] >= 60)]
total_passing_score = len(passing_score)
                   
percent_pass = total_passing_score / total_score * 100   
#percent_pass 

92.4457492979321

In [14]:
district_summary_df = pd.DataFrame(
    {"Total Schools": [total_schools], 
     "Total Students": [total_students],
     "Total Budget": [total_budget],
     "Average Math Score": [average_math_score],
     "Average Reading Score": [average_reading_score],
     "% Passing Math": [percent_math_pass],
     "% Passing Reading": [percent_reading_pass],
     "% Overall Passing": [percent_pass]
                       })
#district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,92.445749,100.0,92.445749


In [15]:
# Use Map to format all the columns
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.0f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.0f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.2f}".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.2f}".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.2f}".format)
district_summary_df


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",79,82,92.45,100.0,92.45


## School Summary

Serves as an overview of key metrics on each school.

In [16]:
# Merge school dateframe with student dataframe using an outer join at school name
merge_df = pd.merge(school_df, student_df, on="school_name", how="outer")
merge_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [17]:
# add in columns needed for analysis
merge_df['Per Student Budget'] = merge_df['budget'] / merge_df['size']
merge_df["% Passing Math"] = merge_df["math_score"] >= 60
merge_df["% Passing Reading"] = merge_df["reading_score"] >= 60
merge_df["% Overall Passing"] = (merge_df["math_score"]>=60) & (merge_df["reading_score"]>=60)
merge_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score,Per Student Budget,% Passing Math,% Passing Reading,% Overall Passing
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79,655.0,True,True,True
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61,655.0,True,True,True
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60,655.0,True,True,True
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58,655.0,False,True,False
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84,655.0,True,True,True


In [18]:
school_summary_df = merge_df.groupby(['school_name','type']).mean()
school_summary_df


Unnamed: 0_level_0,Unnamed: 1_level_0,School ID,size,budget,Student ID,reading_score,math_score,Per Student Budget,% Passing Math,% Passing Reading,% Overall Passing
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Bailey High School,District,7.0,4976.0,3124928.0,20358.5,81.033963,77.048432,628.0,0.895297,1.0,0.895297
Cabrera High School,Charter,6.0,1858.0,1081356.0,16941.5,83.97578,83.061895,582.0,1.0,1.0,1.0
Figueroa High School,District,1.0,2949.0,1884411.0,4391.0,81.15802,76.711767,639.0,0.884368,1.0,0.884368
Ford High School,District,13.0,2739.0,1763916.0,36165.0,80.746258,77.102592,644.0,0.893027,1.0,0.893027
Griffin High School,Charter,4.0,1468.0,917500.0,12995.5,83.816757,83.351499,625.0,1.0,1.0,1.0
Hernandez High School,District,3.0,4635.0,3022020.0,9944.0,80.934412,77.289752,652.0,0.890831,1.0,0.890831
Holden High School,Charter,8.0,427.0,248087.0,23060.0,83.814988,83.803279,581.0,1.0,1.0,1.0
Huang High School,District,0.0,2917.0,1910635.0,1458.0,81.182722,76.629414,655.0,0.888584,1.0,0.888584
Johnson High School,District,12.0,4761.0,3094650.0,32415.0,80.966394,77.072464,650.0,0.891829,1.0,0.891829
Pena High School,Charter,9.0,962.0,585858.0,23754.5,84.044699,83.839917,609.0,1.0,1.0,1.0


In [20]:
#School Name
#School Type
#Total Students
#Total School Budget
#Per Student Budget
#Average Math Score
#Average Reading Score
#Passing Math (The percentage of students that passed math.)
#Passing Reading (The percentage of students that passed reading.)
#Overall Passing

## Bottom Performing Schools

Table shows bottom 5 performing schools based on % Overall Passing.

In [22]:
bottom_performing_df = school_summary_df.sort_values("% Overall Passing")
bottom_performing_df.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,School ID,size,budget,Student ID,reading_score,math_score,Per Student Budget,% Passing Math,% Passing Reading,% Overall Passing
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Figueroa High School,District,1.0,2949.0,1884411.0,4391.0,81.15802,76.711767,639.0,0.884368,1.0,0.884368
Rodriguez High School,District,11.0,3999.0,2547363.0,28035.0,80.744686,76.842711,637.0,0.885471,1.0,0.885471
Huang High School,District,0.0,2917.0,1910635.0,1458.0,81.182722,76.629414,655.0,0.888584,1.0,0.888584
Hernandez High School,District,3.0,4635.0,3022020.0,9944.0,80.934412,77.289752,652.0,0.890831,1.0,0.890831
Johnson High School,District,12.0,4761.0,3094650.0,32415.0,80.966394,77.072464,650.0,0.891829,1.0,0.891829


## Math Scores by Grade

Table lists the average math score for students of each grade level (9th, 10th, 11th, 12th) at each school