### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Rename "size" to clarify that it refers to number of students, not area or other measure of size
school_data = school_data.rename(columns={"size":"enrollment"})
school_data["per_student_budget"] = school_data["budget"] / school_data["enrollment"]

# Initial evaluation of student performance: add count of 1 to indicate that student is passing
student_data.loc[student_data["math_score"] >= 70, "passing_math"] = 1
student_data.loc[student_data["math_score"] < 70, "passing_math"] = 0
student_data.loc[student_data["reading_score"] >= 70, "passing_reading"] = 1
student_data.loc[student_data["reading_score"] < 70, "passing_reading"] = 0
student_data.loc[student_data["passing_math"] + student_data["passing_reading"] == 2, "passing_overall"] = 1
student_data.loc[student_data["passing_math"] + student_data["passing_reading"] < 2, "passing_overall"] = 0

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,passing_math,passing_reading,passing_overall,School ID,type,enrollment,budget,per_student_budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,1.0,0.0,0.0,0,District,2917,1910635,655.0
1,1,Victor Smith,M,12th,Huang High School,94,61,0.0,1.0,0.0,0,District,2917,1910635,655.0
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0.0,1.0,0.0,0,District,2917,1910635,655.0
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0.0,0.0,0.0,0,District,2917,1910635,655.0
4,4,Bonnie Ray,F,9th,Huang High School,97,84,1.0,1.0,1.0,0,District,2917,1910635,655.0


In [2]:
# Evaluate data sets
school_data.columns
# Index(['School ID', 'school_name', 'type', 'size', 'budget'], dtype='object')
# school_data["school_name"].count()
# 15 records
# size = count of students attending school_name

##student_data["Student ID"].nunique()
# Index(['Student ID', 'student_name', 'gender', 'grade', 'school_name','reading_score', 'math_score'],
#      dtype='object')
# student_data.count()
# 39170 records, 39170 unique student IDs

Index(['School ID', 'school_name', 'type', 'enrollment', 'budget',
       'per_student_budget'],
      dtype='object')

## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

## Assumptions

* All schools in the input file are part of the same district:

    * because District identifier is not part of the input file

    * in spite of type values in file = "District", "Charter"


* All students in the input file are part of the same district:

    * all schools in the student file are found in the school file

    * and all schools in the school file are assumed part of same district (see above)


In [9]:
# Calculate the total number of schools
##total_schools = school_data_complete["school_name"].nunique()
##total_schools = len(school_data)
##total_schools = school_data["school_name"].count()
total_schools = school_data["school_name"].nunique()

# Calculate the total number of students
total_students = school_data["enrollment"].sum()

# Calculate the total budget
total_budget = school_data["budget"].sum()

# Calculate average math score
avg_math_score = student_data["math_score"].mean()

# Calculate average reading score
avg_reading_score = student_data["reading_score"].mean()

# Calculate the percentage of students with a passing math score (70 or greater)
passing_math_pct = (student_data["passing_math"].sum()/total_students)*100

# Calculate the percentage of students with a passing reading score (70 or greater)
passing_reading_pct = (student_data["passing_reading"].sum()/total_students)*100

# Calculate the percentage of students who passed math and reading (% Overall Passing)
passing_overall_pct = (student_data["passing_overall"].sum()/total_students)*100

# Create dataframe of results 
district_summary_df = pd.DataFrame({"Total Schools" : [total_schools],
                                    "Total Students" : total_students,
                                    "Total Budget" : total_budget,
                                    "Average Math Score" : avg_math_score,
                                    "Average Reading Score" : avg_reading_score,
                                    "Percent Passing Math" : passing_math_pct,
                                    "Percent Passing Reading" : passing_reading_pct,
                                    "Percent Passing Overall" : passing_overall_pct                                       
                                   })

# Format numbers for display
district_summary_df["Total Students"] = district_summary_df["Total Students"].astype(float).map("{:,.0f}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].astype(float).map("${:,.0f}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].astype(float).map("{:,.1f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].astype(float).map("{:,.1f}".format)
district_summary_df["Percent Passing Math"] = district_summary_df["Percent Passing Math"].astype(float).map("{:,.1f}%".format)
district_summary_df["Percent Passing Reading"] = district_summary_df["Percent Passing Reading"].astype(float).map("{:,.1f}%".format)
district_summary_df["Percent Passing Overall"] = district_summary_df["Percent Passing Overall"].astype(float).map("{:,.1f}%".format)

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Overall
0,15,39170,"$24,649,428",79.0,81.9,75.0%,85.8%,65.2%


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [4]:
# set up school summary dataframe with static values
school_summary_df = school_data[["school_name", "type", "enrollment", "budget", "per_student_budget"]]

# calc average scores and total counts of students passing by school
school_avg_math_scores_df = school_group["math_score"].mean()
school_avg_reading_scores_df = school_group["reading_score"].mean()
school_passing_math_df = school_group["passing_math"].sum()
school_passing_reading_df = school_group["passing_reading"].sum()
school_passing_overall_df = school_group["passing_overall"].sum()

# add calculated values to school summary dataframe
school_group = school_data_complete.groupby(["school_name"])
school_summary_df = pd.merge(school_summary_df, school_avg_math_scores_df, on="school_name")
school_summary_df = pd.merge(school_summary_df, school_avg_reading_scores_df, on="school_name")
school_summary_df = pd.merge(school_summary_df, school_passing_math_df, on="school_name")
school_summary_df = pd.merge(school_summary_df, school_passing_reading_df, on="school_name")
school_summary_df = pd.merge(school_summary_df, school_passing_overall_df, on="school_name")

# calc percent of students passing fields
school_summary_df["pct_passing_math"] = (school_summary_df["passing_math"] / school_summary_df["enrollment"])*100
school_summary_df["pct_passing_reading"] = (school_summary_df["passing_reading"] / school_summary_df["enrollment"])*100
school_summary_df["pct_passing_overall"] = (school_summary_df["passing_overall"] / school_summary_df["enrollment"])*100

# remove total students passing fields
school_summary_df = school_summary_df[["school_name", "type", "enrollment", "budget", "per_student_budget",
                                   "math_score", "reading_score", 
                                    "pct_passing_math", "pct_passing_reading", "pct_passing_overall"
                                    ]]

# rename fields for display
school_summary_df = school_summary_df.rename(columns={"school_name" : "School",
                                                      "type" : "Type",
                                                      "enrollment" : "Enrollment",
                                                      "budget" : "Budget",
                                                      "per_student_budget" : "Budget / Student",
                                                      "math_score" : "Average Math Score", 
                                                      "reading_score" : "Average Reading Score",
                                                      "pct_passing_math" : "Percent Passing Math",
                                                      "pct_passing_reading" : "Percent Passing Reading",
                                                      "pct_passing_overall" : "Percent Passing Overall"
                                                        })

# format numeric fields in dataframe for display
school_summary_df["Enrollment"] = school_summary_df["Enrollment"].astype(float).map("{:,.0f}".format)
school_summary_df["Budget"] = school_summary_df["Budget"].astype(float).map("${:,.0f}".format)
school_summary_df["Average Math Score"] = school_summary_df["Average Math Score"].astype(float).map("{:,.1f}".format)
school_summary_df["Average Reading Score"] = school_summary_df["Average Reading Score"].astype(float).map("{:,.1f}".format)
school_summary_df["Percent Passing Math"] = school_summary_df["Percent Passing Math"].astype(float).map("{:,.1f}%".format)
school_summary_df["Percent Passing Reading"] = school_summary_df["Percent Passing Reading"].astype(float).map("{:,.1f}%".format)
school_summary_df["Percent Passing Overall"] = school_summary_df["Percent Passing Overall"].astype(float).map("{:,.1f}%".format)

school_summary_df


NameError: name 'school_group' is not defined

## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

## Reading Score by Grade 

* Perform the same operations as above for reading scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

## Scores by School Size

* Perform the same operations as above, based on school size.

## Scores by School Type

* Perform the same operations as above, based on school type