# PyCity Schools Data Analysis
- analysis

---

In [128]:
# setting up modules
import pandas as pd
from pathlib import Path

# loading in csv files for path
school_data_csv = Path('Resources/schools_complete.csv')
student_data_csv = Path('Resources/students_complete.csv')

# read school and student csv data
school_data = pd.read_csv(school_data_csv)
student_data = pd.read_csv(student_data_csv)

# merging data to create a single data set
complete_school_data = pd.merge(school_data, student_data, on= 'school_name')
complete_school_data.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


# District Summary

In [129]:
# calculating total number of unique schools
school_count = complete_school_data['school_name'].nunique()
school_count

15

In [130]:
# calculating total number of students
student_count = complete_school_data['student_name'].count()
student_count

39170

In [131]:
# calculating the total budget
total_budget = complete_school_data['budget'].unique()
total_budget = total_budget.sum()
total_budget

24649428

In [132]:
# calculating the average math score
average_math_score = complete_school_data['math_score'].mean()
average_math_score

78.98537145774827

In [133]:
# calculating the average reading score
average_reading_score = complete_school_data['reading_score'].mean()
average_reading_score

81.87784018381414

In [134]:
# calculating the percentage of students who passed math (math scores greather than or equal to 70)
passing_math_count = complete_school_data[(complete_school_data['math_score'] >= 70)].count()['student_name']
passing_math_percentage = passing_math_count / float(student_count) * 100
passing_math_percentage

74.9808526933878

In [135]:
# calculating the percentage of students who passed reading (reading scores greather than or equal to 70)
passing_reading_count = complete_school_data[(complete_school_data['reading_score'] >= 70)].count()['student_name']
passing_reading_percentage = passing_reading_count / float(student_count) * 100
passing_reading_percentage

85.80546336482001

In [136]:
# calculating the percentage of students that passed math and reading
passing_math_reading_count = complete_school_data[(complete_school_data['math_score'] >= 70)
    & (complete_school_data['reading_score'] >= 70)].count()['student_name']
overall_passing_rate = passing_math_reading_count /  float(student_count) * 100
overall_passing_rate

65.17232575950983

In [137]:
# creating a high-level snapshot of the district's key metrics in a DataFrame
district_summary = pd.DataFrame({'Total Schools': [school_count], 'Total Students': [student_count], 'Total Budget': [total_budget], 
                                 'Average Math Score': [average_math_score], 'Average Reading Score': [average_reading_score], 
                                 '% Passing Math': [passing_math_percentage], '% Passing Reading': [passing_reading_percentage], 
                                 '% Overall Passing': [overall_passing_rate]})

# formatting
district_summary['Total Schools'] = district_summary['Total Schools'].map('{:,}'.format)
district_summary['Total Students'] = district_summary['Total Students'].map('{:,}'.format)
district_summary['Total Budget'] = district_summary['Total Budget'].map('${:,.2f}'.format)
district_summary['Average Math Score'] = district_summary['Average Math Score'].map('{:,.2f}'.format)
district_summary['Average Reading Score'] = district_summary['Average Reading Score'].map('{:,.2f}'.format)
district_summary['% Passing Math'] = district_summary['% Passing Math'].map('{:,.2f}%'.format)
district_summary['% Passing Reading'] = district_summary['% Passing Reading'].map('{:,.2f}%'.format)
district_summary['% Overall Passing'] = district_summary['% Overall Passing'].map('{:,.2f}%'.format)

# display the DataFrame
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


# School Summary