# City Schools Pandas Challenge 

This analysis of the district-wide standardized test results aggregates data containing student's math and reading 
scores, as well as various information on the schools they attend. The purpose is to showcase obvious trends in school 
performance.

In [1]:
# Dependencies
import pandas as pd

In [2]:
# Store filepath for the school file
school_file = "Resources/schools_complete.csv"

In [3]:
#Store filepath for the student file
student_file = "Resources/students_complete.csv"

In [4]:
# Read the school file with the pandas library
school_df = pd.read_csv(school_file)
school_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [5]:
# Read the student file with the pandas library
student_df = pd.read_csv(student_file)
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


# District Summary

In [6]:
# This is a high level snapshot of the district's key metrics.

In [7]:
#Find district summary information, like total schools
total_schools = school_df["school_name"].count()
#total_schools

In [8]:
total_students = school_df["size"].sum()
#total_students

In [9]:
total_budget = school_df["budget"].sum()
#total_budget

In [10]:
average_math_score = student_df["math_score"].mean()
#average_math_score

In [11]:
average_reading_score = student_df["reading_score"].mean()
#average_reading_score

In [12]:
# Find percent of students passing math, start with total scores:
#total_math_score = student_df["math_score"].count()
#total_math_score

In [13]:
# Create bins to hold pass/fail data for both math and reading
bins = [0, 59, 100]

bin_names = ["Fail", "Pass"]

In [14]:
# Identify math scores as either pass or fail and add to bins
binned_math = pd.cut(student_df['math_score'], bins, labels=bin_names)
# Create column in student data frame
student_df['math_passfail'] = binned_math
student_df

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,math_passfail
0,0,Paul Bradley,M,9th,Huang High School,66,79,Pass
1,1,Victor Smith,M,12th,Huang High School,94,61,Pass
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,Pass
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,Fail
4,4,Bonnie Ray,F,9th,Huang High School,97,84,Pass
...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,Pass
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,Pass
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,Pass
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,Pass


In [15]:
# Find total reading scores for percent passing reading function: 
#total_reading_score = student_df["reading_score"].count()
#total_reading_score

In [16]:
# Identify reading scores as either pass or fail and add to bins
binned_reading = pd.cut(student_df['reading_score'], bins, labels=bin_names)
# Create column in student data frame
student_df['reading_passfail'] = binned_reading
#student_df

In [17]:
# Find total number of students with passing math scores
math_pass = student_df['math_passfail'].value_counts('Pass')
#math_pass

In [18]:
# Find total number of students with passing reading scores
reading_pass = student_df['reading_passfail'].value_counts('Pass')
#reading_pass

In [19]:
#Find % of students overall passing
student_df["overall_passfail"] = student_df["math_passfail"].astype(str) + student_df["reading_passfail"].astype(str)
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,math_passfail,reading_passfail,overall_passfail
0,0,Paul Bradley,M,9th,Huang High School,66,79,Pass,Pass,PassPass
1,1,Victor Smith,M,12th,Huang High School,94,61,Pass,Pass,PassPass
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,Pass,Pass,PassPass
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,Fail,Pass,FailPass
4,4,Bonnie Ray,F,9th,Huang High School,97,84,Pass,Pass,PassPass


In [20]:
overall_pass = student_df['overall_passfail'].value_counts('PassPass')
#overall_pass

In [21]:
district_summary_df = pd.DataFrame(
    {"Total Schools": [total_schools], 
     "Total Students": [total_students],
     "Total Budget": [total_budget],
     "Average Math Score": [average_math_score],
     "Average Reading Score": [average_reading_score],
     "% Passing Math": [math_pass],
     "% Passing Reading": [reading_pass],
     "% Overall Passing": [overall_pass]
                       })
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,Pass 0.924457 Fail 0.075543 Name: math_p...,Pass 1.0 Fail 0.0 Name: reading_passfail...,PassPass 0.924457 FailPass 0.075543 Name...


# School Summary

In [22]:
# Serves as an overview of key metrics on each school.
#School Name
#School Type
#Total Students
#Total School Budget
#Per Student Budget
#Average Math Score
#Average Reading Score
#% Passing Math (The percentage of students that passed math.)
#% Passing Reading (The percentage of students that passed reading.)
#% Overall Passing (The percentage of students that passed math **and** reading.)

In [23]:
# Merge school dateframe with student dataframe using an outer join at school name
merge_df = pd.merge(school_df, student_df, on="school_name", how="outer")
merge_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score,math_passfail,reading_passfail,overall_passfail
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79,Pass,Pass,PassPass
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61,Pass,Pass,PassPass
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60,Pass,Pass,PassPass
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58,Fail,Pass,FailPass
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84,Pass,Pass,PassPass
