# PyCity Schools Analysis


In [1]:
import os
import pandas as pd
from pathlib import Path

os.listdir("Resources")

['.ipynb_checkpoints',
 'schools_complete.csv',
 'students_complete.csv',
 'Untitled.ipynb']

In [2]:
school_data_to_load = Path("Resources/schools_complete.csv")
student_data_to_load = Path("Resources/students_complete.csv")


school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)


school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary

In [3]:
schools_count = school_data_complete['school_name'].nunique()

print(schools_count)

15


In [4]:
students_count = school_data_complete['student_name'].shape[0]

print(students_count)


39170


In [5]:
total_budget = school_data_complete.drop_duplicates(subset= 'school_name')['budget'].sum()

print(total_budget)

24649428


In [6]:
average_math_score = school_data_complete['math_score'].mean()

print(average_math_score)

78.98537145774827


In [7]:
average_reading_score = school_data_complete['reading_score'].mean()

print(average_reading_score)

81.87784018381414


In [8]:
passing_math_count = school_data_complete[(school_data_complete["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(students_count) * 100

print(passing_math_percentage)

74.9808526933878


In [9]:
passing_reading_count = school_data_complete[(school_data_complete['reading_score'] >=70)].count()['student_name']
passing_reading_percentage = passing_reading_count / float(students_count)* 100

print(passing_reading_percentage)

85.80546336482001


In [10]:
passing_math_reading_count = school_data_complete[
    (school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_math_reading_count / float(students_count)* 100 

print(overall_passing_rate)

65.17232575950983


In [11]:
district_summary = {'Total Schools': [schools_count], 'Total Students': [students_count], 'Total Budget': [total_budget],
                         'Average Math Score': [average_math_score], 'Average Reading Score': [average_reading_score],
                         '% Passing Math': [passing_math_percentage], '% Passing Reading': [passing_reading_percentage],
                         '% Overall Passing': [overall_passing_rate]}

df = pd.DataFrame(district_summary)

df["Total Students"] = df["Total Students"].map("{:,}".format)
df["Total Budget"] = df["Total Budget"].map("${:,.2f}".format)

styled_df = df.style.set_caption("District Summary").set_table_styles([{'selector': 'th', 'props': [('font-weight', 'bold')]}])


df.head()


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


# School Summary

In [12]:
school_types = school_data.set_index(['school_name'])['type']

In [13]:
per_school_counts = school_data_complete['school_name'].value_counts()

In [14]:
per_school_budget = school_data_complete.groupby(["school_name"]).mean()["budget"]

per_school_capita = per_school_budget / per_school_counts

In [15]:
per_school_math = school_data_complete.groupby(['school_name']).mean()['math_score']



In [16]:
per_school_reading= school_data_complete.groupby(['school_name']).mean()['reading_score']


In [17]:
school_passing_math = school_data_complete[(school_data_complete["math_score"] >= 70)]





In [18]:
school_passing_reading = school_data_complete[(school_data_complete["reading_score"] >= 70)]





In [19]:
passing_math_and_reading = school_data_complete[
    (school_data_complete['reading_score'] >=70) & (school_data_complete['math_score'] >=70)
]

In [20]:
per_school_passing_math = school_passing_math.groupby(["school_name"]).count()["student_name"] / per_school_counts * 100
per_school_passing_reading = school_passing_reading.groupby(["school_name"]).count()["student_name"] / per_school_counts * 100
overall_passing_rate = passing_math_and_reading.groupby(["school_name"]).count()["student_name"] / per_school_counts * 100

In [21]:
per_school_summary= {
                    'School Type': school_types,
                    'Total Students': per_school_counts,
                    'Total School Budget': per_school_budget,
                    'Per Student Budget': per_school_capita,
                    'Average Math Score': per_school_math,
                    'Average Reading Score': per_school_reading,
                    '% Passing Math': school_passing_math,
                    '% Passing Reading': school_passing_reading,
                    '% Overall Passing': passing_math_and_reading
}
df = pd.DataFrame(per_school_summary)

df["Total School Budget"] = df["Total School Budget"].map("${:,.2f}".format)
df["Per Student Budget"] = df["Per Student Budget"].map("${:,.2f}".format)

df.head()


                   
                   
                   
                 

ValueError: Buffer has wrong number of dimensions (expected 1, got 2)