# PyCity Schools Analysis

- My analysis here

In [3]:
# import modules
import pandas as pd
from pathlib import Path

# path to source files
school_data = Path("Resources/schools_complete.csv")
student_data = Path("Resources/students_complete.csv")

# read data into dataframes
school_data_df = pd.read_csv(school_data)
student_data_df = pd.read_csv(student_data)

# merge into single data set
school_data_complete_df = pd.merge(student_data_df, school_data_df, how="left", on=["school_name", "school_name"])
school_data_complete_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [8]:
# Total number of unique schools
school_count = len(school_data_complete_df["school_name"].unique())
school_count

15

In [11]:
# total number of students
student_count = len(school_data_complete_df["student_name"])
student_count

39170

In [14]:
# calculate total budget
total_budget = sum(school_data_complete_df["budget"].unique())
total_budget

24649428

In [15]:
# calculate average (mean) math score
average_math_score = sum(school_data_complete_df["math_score"]) / len(school_data_complete_df["math_score"])
average_math_score

78.98537145774827

In [16]:
# calculate average (mean) reading score
average_reading_score = sum(school_data_complete_df["reading_score"]) / len(school_data_complete_df["reading_score"])
average_reading_score

81.87784018381414

In [17]:
# calculate the percentage of students who passed math (math scores greather than or equal to 70)
passing_math_count = school_data_complete_df[(school_data_complete_df["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(student_count) * 100
passing_math_percentage

74.9808526933878

In [18]:
# calculate the percentage of students who passed reading (reading scores greather than or equal to 70)
passing_reading_count = school_data_complete_df[(school_data_complete_df["reading_score"] >= 70)].count()["student_name"]
passing_reading_percentage = passing_reading_count / float(student_count) * 100
passing_reading_percentage

85.80546336482001

In [19]:
# calculate the percentage of students that passed math and reading
passing_math_reading_count = school_data_complete_df[
    (school_data_complete_df["math_score"] >= 70) & (school_data_complete_df["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_math_reading_count /  float(student_count) * 100
overall_passing_rate

65.17232575950983

In [22]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
district_summary = pd.DataFrame({
    "school_count": [school_count],
    "student_count": [student_count],
    "total_budget": [total_budget],
    "average_math_score": [average_math_score],
    "average_reading_score": [average_reading_score],
    "passing_math_percentage": [passing_math_percentage],
    "passing_reading_percentage": [passing_reading_percentage],
    "overall_passing_rate": [overall_passing_rate]
})
# Formatting
district_summary["school_count"] = district_summary["school_count"].map("{:,}".format)
district_summary["total_budget"] = district_summary["total_budget"].map("${:,.2f}".format)

# Display the DataFrame
district_summary

Unnamed: 0,school_count,student_count,total_budget,average_math_score,average_reading_score,passing_math_percentage,passing_reading_percentage,overall_passing_rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [23]:
# select all school types
school_types = school_data_complete_df["type"].unique()
school_types

array(['District', 'Charter'], dtype=object)

In [25]:
# calculate student total per school
per_school_counts = school_data_complete_df["school_name"].value_counts()
per_school_counts

school_name
Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Huang High School        2917
Ford High School         2739
Wilson High School       2283
Cabrera High School      1858
Wright High School       1800
Shelton High School      1761
Thomas High School       1635
Griffin High School      1468
Pena High School          962
Holden High School        427
Name: count, dtype: int64

In [30]:
# total school budget and per capita spending per school
per_school_budget = school_data_complete_df["school_name"].value_counts("budget")
per_school_budget

school_name
Bailey High School       0.127036
Johnson High School      0.121547
Hernandez High School    0.118330
Rodriguez High School    0.102093
Figueroa High School     0.075287
Huang High School        0.074470
Ford High School         0.069926
Wilson High School       0.058284
Cabrera High School      0.047434
Wright High School       0.045954
Shelton High School      0.044958
Thomas High School       0.041741
Griffin High School      0.037478
Pena High School         0.024560
Holden High School       0.010901
Name: proportion, dtype: float64