# PyCity Schools Analysis

- Your analysis here
  
---

In [75]:
# Import and setups
import pandas as pd
import os

school_data_initial = os.path.join('resources','schools_complete.csv')
student_data_initial = os.path.join('resources','students_complete.csv')
school_raw = pd.read_csv(school_data_initial)
student_raw = pd.read_csv(student_data_initial)

# Initialize an empty dictionary to hold values for district summary dataframe.
district_summary_data = {}

# Combine dataframes
combined_school_data = pd.merge(student_raw, school_raw, how="left", on=["school_name"])
combined_school_data


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


## District Summary

In [18]:
# Total Number of unique schools
total_unique_school = combined_school_data['school_name'].value_counts(dropna=False).count()
district_summary_data['Total Schools'] = total_unique_school
total_unique_school


15

In [3]:
# Total number of students
student_count = combined_school_data['student_name'].count()
district_summary_data['Total Students'] = student_count
student_count

39170

In [4]:
# Total Budget
unique_budgets = combined_school_data['budget'].unique()
total_budget = sum(unique_budgets)
district_summary_data['Total Budget'] = total_budget
total_budget
  

24649428

In [5]:
# Avg math score
avg_math_score = combined_school_data['math_score'].mean()
district_summary_data['Average Math Score'] = avg_math_score
avg_math_score

78.98537145774827

In [6]:
# Avg reading score 
avg_reading_score = combined_school_data['reading_score'].mean()
district_summary_data['Average Reading Score'] = avg_reading_score
avg_reading_score

81.87784018381414

In [41]:
# Percent students who passed math
passing_math_count = combined_school_data[(combined_school_data["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(student_count) * 100
district_summary_data['% Passing Math'] = passing_math_percentage
passing_math_percentage
passing_math_percentage

74.9808526933878

In [32]:
# Percent students who passed reading
passing_reading_count = combined_school_data[(combined_school_data["reading_score"] >= 70)].count()['student_name']
passing_reading_percentage = passing_reading_count/float(student_count) * 100
district_summary_data['% Passing Reading'] = passing_reading_percentage
passing_reading_percentage

85.80546336482001

In [33]:
# Percent students who passed math and reading
passing_math_reading_count = combined_school_data[(combined_school_data["reading_score"] >= 70) & (combined_school_data["math_score"] >= 70)].count()['student_name']
overall_passing_rate = passing_math_reading_count/float(student_count)*100
district_summary_data['% Overall Passing'] = overall_passing_rate
overall_passing_rate


65.17232575950983

In [37]:
district_summary = pd.DataFrame([district_summary_data])

district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [91]:
# Type of school
school_types = combined_school_data.set_index(["school_name"])["type"]
combined_school_data.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [93]:
# Total student count per school

per_school_counts = combined_school_data.groupby('school_name')['student_name'].count()

print(per_school_counts)

school_name
Bailey High School       4976
Cabrera High School      1858
Figueroa High School     2949
Ford High School         2739
Griffin High School      1468
Hernandez High School    4635
Holden High School        427
Huang High School        2917
Johnson High School      4761
Pena High School          962
Rodriguez High School    3999
Shelton High School      1761
Thomas High School       1635
Wilson High School       2283
Wright High School       1800
Name: student_name, dtype: int64


39170