In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [55]:
#create dataframe
a_school_df = pd.DataFrame(school_data_complete)

school_df = a_school_df.sort_values("school_name")
school_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
19584,19584,Tammie Fox,F,11th,Bailey High School,82,92,7,District,4976,3124928
21193,21193,Jennifer Murray,F,9th,Bailey High School,88,89,7,District,4976,3124928
21192,21192,Lisa Pineda,F,9th,Bailey High School,86,67,7,District,4976,3124928
21191,21191,Cameron Miller,M,11th,Bailey High School,70,75,7,District,4976,3124928
21190,21190,Thomas Rasmussen,M,12th,Bailey High School,77,82,7,District,4976,3124928


# District Summary

In [5]:
#total number of schools
total_schools = len(school_df["school_name"].unique())
#total number of students
total_students = len(school_df["student_name"])
#total budget for all schools
total_budget = sum(school_df["budget"].unique())
#average math scores
avg_math = f'{school_df["math_score"].mean():.6f}'
#average reading scores
avg_read = f'{school_df["reading_score"].mean():.6f}'
#Percent of students passing math
school_df['math_score'] = school_df.loc[:, "math_score"].astype(float)
perc_pass_math = len(school_df[school_df['math_score'] >= 70]) / total_students * 100
f'{perc_pass_math:.6f}'
#percent of students passing reading
school_df['reading_score'] = school_df.loc[:, "reading_score"].astype(float)
perc_pass_read = len(school_df[school_df['reading_score'] >= 70]) / total_students * 100
f'{perc_pass_read:.6f}'

In [17]:
perc_pass_both = len(school_df[school_df['reading_score', "math_score"] >= 70]) / total_students * 100
f'{perc_pass_both:.6f}'

KeyError: ('reading_score', 'math_score')

# School Summary

In [49]:
school_list = school_df["school_name"].unique()
school_list

array(['Bailey High School', 'Cabrera High School',
       'Figueroa High School', 'Ford High School', 'Griffin High School',
       'Hernandez High School', 'Holden High School', 'Huang High School',
       'Johnson High School', 'Pena High School', 'Rodriguez High School',
       'Shelton High School', 'Thomas High School', 'Wilson High School',
       'Wright High School'], dtype=object)

In [50]:
grouped_school_data_df = school_df.groupby(["school_name"])
grouped_school_data_df.count().head(10)

Unnamed: 0_level_0,Student ID,student_name,gender,grade,reading_score,math_score,School ID,type,size,budget
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bailey High School,4976,4976,4976,4976,4976,4976,4976,4976,4976,4976
Cabrera High School,1858,1858,1858,1858,1858,1858,1858,1858,1858,1858
Figueroa High School,2949,2949,2949,2949,2949,2949,2949,2949,2949,2949
Ford High School,2739,2739,2739,2739,2739,2739,2739,2739,2739,2739
Griffin High School,1468,1468,1468,1468,1468,1468,1468,1468,1468,1468
Hernandez High School,4635,4635,4635,4635,4635,4635,4635,4635,4635,4635
Holden High School,427,427,427,427,427,427,427,427,427,427
Huang High School,2917,2917,2917,2917,2917,2917,2917,2917,2917,2917
Johnson High School,4761,4761,4761,4761,4761,4761,4761,4761,4761,4761
Pena High School,962,962,962,962,962,962,962,962,962,962


In [54]:
school_students = grouped_school_data_df["Student ID"].count()
school_budget = school_df["budget"].unique()
avg_math_score = grouped_school_data_df["math_score"].mean()
avg_read_score = grouped_school_data_df["reading_score"].mean()


In [38]:
school_summary_df = pd.DataFrame({
    "Number of students":school_students,
    "School Budget": school_budget,
    "Per Student Budget": school_budget/school_students,
    "Average Math Score" : avg_math_score, 
    "Average Reading Score" : avg_read_score
})
school_summary_df

Unnamed: 0_level_0,Number of students,School Budget,Per Student Budget,Average Math Score,Average Reading Score
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bailey High School,4976,3124928,628.0,77.048432,81.033963
Cabrera High School,1858,1081356,582.0,83.061895,83.97578
Figueroa High School,2949,1884411,639.0,76.711767,81.15802
Ford High School,2739,1763916,644.0,77.102592,80.746258
Griffin High School,1468,917500,625.0,83.351499,83.816757
Hernandez High School,4635,3022020,652.0,77.289752,80.934412
Holden High School,427,248087,581.0,83.803279,83.814988
Huang High School,2917,1910635,655.0,76.629414,81.182722
Johnson High School,4761,3094650,650.0,77.072464,80.966394
Pena High School,962,585858,609.0,83.839917,84.044699
