### Initial Setup

In [23]:
# import pandas
import pandas as pd

In [24]:
# load files
school_file = "Resources/schools_complete.csv"
student_file = "Resources/students_complete.csv"

# create initial dataframes
school_df = pd.read_csv(school_file)
student_df = pd.read_csv(student_file)

# merge on school name to get the complete dataframe
merged_df = pd.merge(student_df, school_df, how="left", on="school_name")

In [25]:
# Whether or not sutdents pass math and/or reading comes up repeatedly in this exercise, 
# so we will add those as series.
# The passing score is not specified in the readme.
# The example uses 70 as the passing score, so I will follow suit.
# Bin the test scores based on a passing score of 70.

bins = [0, 69.9, 100]
bin_names = ["Fail", "Pass"]
merged_df["math_group"]= pd.cut(merged_df["math_score"], bins, labels=bin_names, include_lowest=True)
merged_df["reading_group"]= pd.cut(merged_df["reading_score"], bins, labels=bin_names, include_lowest=True)

### District Summary
  * Total Schools
  * Total Students
  * Total Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math (The percentage of students that passed math.)
  * % Passing Reading (The percentage of students that passed reading.)
  * % Overall Passing (The percentage of students that passed math **and** reading.)

In [47]:
district_summary_df = pd.DataFrame([{
    "Total Number of Schools": merged_df["school_name"].nunique(),
    "Total Number of Students": merged_df["student_name"].count(),
    "Total Budget": school_df["budget"].sum(),
    "Average Math Score": merged_df["math_score"].mean(),
    "Average Reading Score": merged_df["reading_score"].mean(),
    "% Passing Math": 100*(merged_df[merged_df["math_group"] == "Pass"].count()["math_group"])/len(merged_df),
    "% Passing Reading": 100*(merged_df[merged_df["reading_group"] == "Pass"].count()["reading_group"])/len(merged_df),
    "% Passing": 100*(merged_df[merged_df["math_group"] == "Pass"].count()["math_group"])
}])
district_summary_df

Unnamed: 0,Total Number of Schools,Total Number of Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,2937000


### School Summary
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math (The percentage of students that passed math.)
  * % Passing Reading (The percentage of students that passed reading.)
  * % Overall Passing (The percentage of students that passed math **and** reading.)

In [54]:
school_group_df = merged_df.groupby(['school_name'])
school_group_df["math_score"].mean()

school_name
Bailey High School       77.048432
Cabrera High School      83.061895
Figueroa High School     76.711767
Ford High School         77.102592
Griffin High School      83.351499
Hernandez High School    77.289752
Holden High School       83.803279
Huang High School        76.629414
Johnson High School      77.072464
Pena High School         83.839917
Rodriguez High School    76.842711
Shelton High School      83.359455
Thomas High School       83.418349
Wilson High School       83.274201
Wright High School       83.682222
Name: math_score, dtype: float64