In [1]:
# Importin pandas library
import pandas as pd

In [2]:
# Loading data and storing into pandas data frames
school_df = pd.read_csv("Resources/schools_complete.csv")
student_df = pd.read_csv("Resources/students_complete.csv")
print(school_df.count())
print(school_df)
print()
print()
print(student_df.count())
print(student_df.head())

School ID      15
school_name    15
type           15
size           15
budget         15
dtype: int64
    School ID            school_name      type  size   budget
0           0      Huang High School  District  2917  1910635
1           1   Figueroa High School  District  2949  1884411
2           2    Shelton High School   Charter  1761  1056600
3           3  Hernandez High School  District  4635  3022020
4           4    Griffin High School   Charter  1468   917500
5           5     Wilson High School   Charter  2283  1319574
6           6    Cabrera High School   Charter  1858  1081356
7           7     Bailey High School  District  4976  3124928
8           8     Holden High School   Charter   427   248087
9           9       Pena High School   Charter   962   585858
10         10     Wright High School   Charter  1800  1049400
11         11  Rodriguez High School  District  3999  2547363
12         12    Johnson High School  District  4761  3094650
13         13       Ford High

In [3]:
# District summary
district_summary = pd.DataFrame({
    "Total Schools": [school_df["school_name"].count()],
    "Total Students": [school_df["size"].sum()],
    "Total Budget": [school_df['budget'].sum()],
    "Average Math Score": [student_df['math_score'].mean()],
    "Average Reading Score": [student_df['reading_score'].mean()],
    "% Passing Math": [student_df["Student ID"][student_df["math_score"] >= 70].count()/student_df["Student ID"].count()*100],
    "% Passing Reading": [student_df["Student ID"][student_df["reading_score"] >= 70].count()/student_df["Student ID"].count()*100],
    "% Overall Passing": [student_df["Student ID"][(student_df["reading_score"] >= 70) & (student_df["math_score"] >= 70)].count()/student_df["Student ID"].count()*100]
})
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [4]:
# Building a data frame with student information by school

grouped_stu_sch = student_df.groupby("school_name")
student_school_df = grouped_stu_sch.mean()
student_school_df = student_school_df.drop(labels = "Student ID", axis = 1)

student_school_df = student_school_df.rename(columns = {
    "reading_score": "Average Reading Score",
    "math_score": "Average Math Score"
})

student_school_df["% Passing Math"] = student_df["school_name"][student_df["math_score"] >= 70].value_counts()/student_df["school_name"].value_counts()*100

student_school_df["% Passing Reading"] = student_df["school_name"][student_df["reading_score"] >= 70].value_counts()/student_df["school_name"].value_counts()*100

student_school_df["% Overall Passing"] = student_df["school_name"][(student_df["math_score"] >= 70) & (student_df["reading_score"] >= 70)].value_counts()/student_df["school_name"].value_counts()*100

# School summary
school_summary = pd.DataFrame({
    "School Name": school_df["school_name"],
    "School Type": school_df["type"],
    "Total Students": school_df["size"],
    "Total School Budget": school_df["budget"],
    "Per Student Budget": school_df["budget"]/school_df["size"]
})

school_summary = school_summary.merge(student_school_df, left_on='School Name', right_on='school_name', how="outer")


school_summary = school_summary.set_index('School Name').sort_values(by=["School Name"])
school_summary



Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Reading Score,Average Math Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928,628.0,81.033963,77.048432,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356,582.0,83.97578,83.061895,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411,639.0,81.15802,76.711767,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916,644.0,80.746258,77.102592,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500,625.0,83.816757,83.351499,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020,652.0,80.934412,77.289752,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087,581.0,83.814988,83.803279,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635,655.0,81.182722,76.629414,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650,650.0,80.966394,77.072464,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858,609.0,84.044699,83.839917,94.594595,95.945946,90.540541


In [5]:
# Top and Bottom Performing Schools (By % Overall Passing)
school_summary = school_summary.sort_values(by=["% Overall Passing"], ascending = False)

In [6]:
school_summary.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Reading Score,Average Math Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356,582.0,83.97578,83.061895,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,1043130,638.0,83.84893,83.418349,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,917500,625.0,83.816757,83.351499,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574,578.0,83.989488,83.274201,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858,609.0,84.044699,83.839917,94.594595,95.945946,90.540541


In [7]:
school_summary.tail(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Reading Score,Average Math Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Johnson High School,District,4761,3094650,650.0,80.966394,77.072464,66.057551,81.222432,53.539172
Hernandez High School,District,4635,3022020,652.0,80.934412,77.289752,66.752967,80.862999,53.527508
Huang High School,District,2917,1910635,655.0,81.182722,76.629414,65.683922,81.316421,53.513884
Figueroa High School,District,2949,1884411,639.0,81.15802,76.711767,65.988471,80.739234,53.204476
Rodriguez High School,District,3999,2547363,637.0,80.744686,76.842711,66.366592,80.220055,52.988247


In [8]:
# Math Scores by Grade

math_scores_grade = student_df.groupby(["grade", "school_name"]).mean().sort_values(by=["grade"], ascending = False).drop(labels="Student ID", axis = 1)
math_scores_grade

Unnamed: 0_level_0,Unnamed: 1_level_0,reading_score,math_score
grade,school_name,Unnamed: 2_level_1,Unnamed: 3_level_1
9th,Wright High School,83.833333,83.264706
9th,Huang High School,81.290284,77.027251
9th,Bailey High School,81.303155,77.083676
9th,Figueroa High School,81.198598,76.403037
9th,Ford High School,80.632653,77.361345
9th,Griffin High School,83.369193,82.04401
9th,Hernandez High School,80.86686,77.438495
9th,Holden High School,83.677165,83.787402
9th,Cabrera High School,83.676136,83.094697
9th,Johnson High School,81.260714,77.187857
