In [1]:
# Importin pandas library
import pandas as pd

In [2]:
# Loading data and storing into pandas data frames
school_df = pd.read_csv("Resources/schools_complete.csv")
student_df = pd.read_csv("Resources/students_complete.csv")
print(school_df.count())
print(school_df)
print()
print()
print(student_df.count())
print(student_df.head())

School ID      15
school_name    15
type           15
size           15
budget         15
dtype: int64
    School ID            school_name      type  size   budget
0           0      Huang High School  District  2917  1910635
1           1   Figueroa High School  District  2949  1884411
2           2    Shelton High School   Charter  1761  1056600
3           3  Hernandez High School  District  4635  3022020
4           4    Griffin High School   Charter  1468   917500
5           5     Wilson High School   Charter  2283  1319574
6           6    Cabrera High School   Charter  1858  1081356
7           7     Bailey High School  District  4976  3124928
8           8     Holden High School   Charter   427   248087
9           9       Pena High School   Charter   962   585858
10         10     Wright High School   Charter  1800  1049400
11         11  Rodriguez High School  District  3999  2547363
12         12    Johnson High School  District  4761  3094650
13         13       Ford High

In [3]:
# District summary
district_summary = pd.DataFrame({
    "Total Schools": [school_df["school_name"].count()],
    "Total Students": [school_df["size"].sum()],
    "Total Budget": [school_df['budget'].sum()],
    "Average Math Score": [student_df['math_score'].mean()],
    "Average Reading Score": [student_df['reading_score'].mean()],
    "% Passing Math": [student_df["Student ID"][student_df["math_score"] >= 60].count()/student_df["Student ID"].count()*100],
    "% Passing Reading": [student_df["Student ID"][student_df["reading_score"] >= 60].count()/student_df["Student ID"].count()*100],
    "% Overall Passing": [student_df["Student ID"][(student_df["reading_score"] >= 60) & (student_df["math_score"] >= 60)].count()/student_df["Student ID"].count()*100]
})
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,92.445749,100.0,92.445749


In [4]:
# Building a data frame with student information by school

grouped_stu_sch = student_df.groupby("school_name")
student_school_df = grouped_stu_sch.mean()
student_school_df = student_school_df.drop(labels = "Student ID", axis = 1)

student_school_df = student_school_df.rename(columns = {
    "reading_score": "Average Reading Score",
    "math_score": "Average Math Score"
})

student_school_df

Unnamed: 0_level_0,Average Reading Score,Average Math Score
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bailey High School,81.033963,77.048432
Cabrera High School,83.97578,83.061895
Figueroa High School,81.15802,76.711767
Ford High School,80.746258,77.102592
Griffin High School,83.816757,83.351499
Hernandez High School,80.934412,77.289752
Holden High School,83.814988,83.803279
Huang High School,81.182722,76.629414
Johnson High School,80.966394,77.072464
Pena High School,84.044699,83.839917


In [5]:
# School summary


school_summary = pd.DataFrame({
    "School Name": school_df["school_name"],
    "School Type": school_df["type"],
    "Total Students": school_df["size"],
    "Total School Budget": school_df["budget"],
    "Per Student Budget": school_df["budget"]/school_df["size"]
})

school_summary = school_summary.merge(student_school_df, left_on='School Name', right_on='school_name', how="outer")


school_summary

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Reading Score,Average Math Score
0,Huang High School,District,2917,1910635,655.0,81.182722,76.629414
1,Figueroa High School,District,2949,1884411,639.0,81.15802,76.711767
2,Shelton High School,Charter,1761,1056600,600.0,83.725724,83.359455
3,Hernandez High School,District,4635,3022020,652.0,80.934412,77.289752
4,Griffin High School,Charter,1468,917500,625.0,83.816757,83.351499
5,Wilson High School,Charter,2283,1319574,578.0,83.989488,83.274201
6,Cabrera High School,Charter,1858,1081356,582.0,83.97578,83.061895
7,Bailey High School,District,4976,3124928,628.0,81.033963,77.048432
8,Holden High School,Charter,427,248087,581.0,83.814988,83.803279
9,Pena High School,Charter,962,585858,609.0,84.044699,83.839917


In [6]:

# Merging both data frames
# complete_df = pd.merge(school_df, student_df)

In [7]:
school_summary = school_summary.set_index('School Name')
school_summary

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Reading Score,Average Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Huang High School,District,2917,1910635,655.0,81.182722,76.629414
Figueroa High School,District,2949,1884411,639.0,81.15802,76.711767
Shelton High School,Charter,1761,1056600,600.0,83.725724,83.359455
Hernandez High School,District,4635,3022020,652.0,80.934412,77.289752
Griffin High School,Charter,1468,917500,625.0,83.816757,83.351499
Wilson High School,Charter,2283,1319574,578.0,83.989488,83.274201
Cabrera High School,Charter,1858,1081356,582.0,83.97578,83.061895
Bailey High School,District,4976,3124928,628.0,81.033963,77.048432
Holden High School,Charter,427,248087,581.0,83.814988,83.803279
Pena High School,Charter,962,585858,609.0,84.044699,83.839917
