### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [214]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [215]:
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [216]:
#renaming columns 
school_data_complete = school_data_complete.rename(columns={"student_name": "Student Name", 
                                    "gender": "Gender", "grade": "Grade", "school_name": "School Name", 
                                    "reading_score": "Reading Score", "math_score": "Math Score", "type": "Type",
                                     "size": "Size", "budget": "Budget"})
school_data_complete.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type,Size,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [217]:
#finds no. of schools and students, the total budget and the average math and reading scores
school_count = len(pd.unique(school_data_complete["School Name"]))

student_total = school_data_complete["Student Name"].count()

#from school data because school_data_complete will add each school's budget many times
Total_Budget = school_data["budget"].sum()

Avg_Math = school_data_complete["Math Score"].mean()

Avg_Reading = school_data_complete["Reading Score"].mean()

In [229]:
#calc the % passing math
Math_Pass = school_data_complete.loc[(school_data_complete["Math Score"] >= 70)]
count_Math_Pass = Math_Pass["Student ID"].count()
perc_Math_Pass = (count_Math_Pass/student_total)*100

In [235]:
#calc the % passing reading
Read_Pass = school_data_complete.loc[(school_data_complete["Reading Score"] >= 70)]
count_Read_Pass = Read_Pass["Student ID"].count()
perc_Read_Pass = (count_Read_Pass/student_total)*100

In [236]:
#calc overall students passing
Pass_Overall = school_data_complete.loc[((school_data_complete["Math Score"] >= 70) & 
                                         (school_data_complete["Reading Score"] >= 70))]

count_Pass_Overall = Pass_Overall["Student ID"].count()
perc_Pass_Overall = (count_Pass_Overall/student_total)*100


In [237]:
Dist_Sum = {"Total Schools": school_count, "Total Students": f"{student_total:,}","Total Budget": f"${Total_Budget:,.2f}",
           "Average Math Score": Avg_Math, "Average Reading Score": Avg_Reading, "% Passing Math": perc_Math_Pass,
           "% Passing Reading": perc_Read_Pass, "% Overall Passing": perc_Pass_Overall}
Dist_Sum_df = pd.DataFrame([Dist_Sum])
#set as df so we can reference later if we need to
Dist_Sum_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [113]:
School_Name = school_data_complete.groupby(["School Name"])

#School_Name_df = pd.DataFrame(School_Name)
Stu_Total = School_Name["Student ID"].count()

school_type = school_data.set_index("school_name")["type"]

In [114]:
#gets a particular school's budget
School_budget = school_data.set_index("school_name")["budget"]

#calcs budget per student for each school
budg_per_student = (school_data.set_index("school_name")["budget"]/school_data.set_index("school_name")["size"])


In [115]:
avg_math_school = School_Name["Math Score"].mean()

avg_read_school = School_Name["Reading Score"].mean()

In [116]:
#for each school we want to group by passing math and count the value to find the % of students of each school that passed
#pass_math = School_Name.loc[(School_Name["Math Score"] >= 70)]
pass_math = school_data_complete[school_data_complete["Math Score"] >= 70].groupby("School Name")["Math Score"].count()

perc_school_math_pass = pass_math/Stu_Total*100


In [117]:
pass_read = school_data_complete[school_data_complete["Reading Score"] >= 70].groupby("School Name")["Student ID"].count()

perc_school_read_pass = pass_read/Stu_Total*100

In [118]:
overall_pass_school = school_data_complete[(school_data_complete["Reading Score"] >= 70) & 
                                    (school_data_complete["Math Score"] >= 70)].groupby("School Name")["Student ID"].count()

perc_overall_pass_school = overall_pass_school/Stu_Total*100

In [119]:
#School_Summary = {"School Type": school_type, "Total Students": f"{Stu_Total:,}","Total Budget": f"${School_budget:,.2f}",
                  #"Per Student Budget": f"${budg_per_student}","Average Math Score": avg_math_school, 
                  #"Average Reading Score": avg_read_school,"% Passing Math": perc_school_math_pass, 
                  #"% Passing Reading": perc_school_read_pass, "% Overall Passing": perc_overall_pass_school}
School_Summary = pd.DataFrame({"School Type": school_type, "Total Students": Stu_Total,"Total Budget": School_budget,
                  "Per Student Budget": budg_per_student,"Average Math Score": avg_math_school, 
                  "Average Reading Score": avg_read_school,"% Passing Math": perc_school_math_pass, 
                  "% Passing Reading": perc_school_read_pass, "% Overall Passing": perc_overall_pass_school})

School_Summary.style.format({"School Type": "{:}", "Total Students": "{:,}","Total Budget": "${:,.2f}",
                  "Per Student Budget": "${:,.2f}","Average Math Score": "{:6f}", 
                  "Average Reading Score": "{:6f}","% Passing Math": "{:6f}", 
                  "% Passing Reading": "{:6f}", "% Overall Passing": "{:6f}"})

Unnamed: 0,School Type,Total Students,Total Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,90.540541


## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

In [120]:
Top_School = School_Summary.sort_values(by=["% Overall Passing"], ascending=False)
#Top_School.head()

## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

In [121]:
Bottom_School = School_Summary.sort_values(by=["% Overall Passing"], ascending=True)

#Bottom_School.head()

## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [239]:
nineth_math = student_data[student_data["grade"] == "9th"].groupby("school_name")["math_score"].mean()
tenth_math = student_data[student_data["grade"] == "10th"].groupby("school_name")["math_score"].mean()
eleventh_math = student_data[student_data["grade"] == "11th"].groupby("school_name")["math_score"].mean()
twelfth_math = student_data[student_data["grade"] == "12th"].groupby("school_name")["math_score"].mean()

Grade_math_scores = pd.DataFrame({"9th": nineth_math, "10th": tenth_math, "11th": eleventh_math, "12th": twelfth_math})

#Grade_math_scores

## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [123]:
nineth_read = student_data[student_data["grade"] == "9th"].groupby("school_name")["reading_score"].mean()
tenth_read = student_data[student_data["grade"] == "10th"].groupby("school_name")["reading_score"].mean()
eleventh_read = student_data[student_data["grade"] == "11th"].groupby("school_name")["reading_score"].mean()
twelfth_read = student_data[student_data["grade"] == "12th"].groupby("school_name")["reading_score"].mean()

Grade_read_scores = pd.DataFrame({"9th": nineth_read, "10th": tenth_read, "11th": eleventh_read, "12th": twelfth_read})

#Grade_read_scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [146]:
#create bins
bins_spend = [0, 584, 629, 644, 680]

# Create the names for the five bins
bins_spend_names = ["<$585", "$585-630","$630-645","$645-680"]

In [147]:
school_data_complete["spend"] = pd.cut(school_data_complete["Budget"]/school_data_complete["Size"], 
                                     bins_spend, labels=bins_spend_names)

In [148]:

by_spending = school_data_complete.groupby(["spend"])

spend_avg_math = by_spending["Math Score"].mean()
spend_avg_read = by_spending["Reading Score"].mean()



In [149]:
spend_math_pass = school_data_complete[school_data_complete["Math Score"] >= 70].groupby("spend")["Student ID"].count()

perc_spend_math_pass = spend_math_pass/by_spending["Student ID"].count()*100

In [150]:
spend_read_pass = school_data_complete[school_data_complete["Reading Score"] >= 70].groupby("spend")["Student ID"].count()

perc_spend_read_pass = spend_read_pass/by_spending["Student ID"].count()*100

In [151]:
spend_all_pass = school_data_complete[(school_data_complete["Reading Score"] >= 70) & 
                                      (school_data_complete["Math Score"] >= 70)].groupby("spend")["Student ID"].count()

perc_spend_all_pass = spend_all_pass/by_spending["Student ID"].count()*100

In [163]:
by_spending_df = pd.DataFrame({"Average Math Score": spend_avg_math, "Average Reading Score": spend_avg_read,
                                "% Passing Math": perc_spend_math_pass, "% Passing Reading": perc_spend_read_pass,
                                "% Overall Passing": perc_spend_all_pass})
by_spending_df.index.name = "Spending Ranges (Per Student)"
by_spending_df.style.format({"Average Math Score": "{:.2f}", "Average Reading Score": "{:.2f}",
                                "% Passing Math": "{:.2f}", "% Passing Reading": "{:.2f}",
                                "% Overall Passing": "{:.2f}"})



Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.36,83.96,93.7,96.69,90.64
$585-630,79.98,82.31,79.11,88.51,70.94
$630-645,77.82,81.3,70.62,82.6,58.84
$645-680,77.05,81.01,66.23,81.11,53.53


Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.46,83.93,93.46,96.61,90.37
$585-630,81.9,83.16,87.13,92.72,81.42
$630-645,78.52,81.62,73.48,84.39,62.86
$645-680,77.0,81.03,66.16,81.13,53.53


## Scores by School Size

* Perform the same operations as above, based on school size.

In [196]:
#create bins
bins_size = [0, 1000, 1999, 5000]

# Create the names for the five bins
bins_size_names = ["Small (<1000)", "Medium (1000-2000)","Large (2000-5000)"]

In [197]:
school_data_complete["sizes"] = pd.cut(school_data_complete["Size"], bins_size, labels=bins_size_names)



In [198]:
by_size = school_data_complete.groupby(["sizes"])

size_avg_math = by_size["Math Score"].mean()
size_avg_read = by_size["Reading Score"].mean()

In [200]:
size_math_pass = school_data_complete[school_data_complete["Math Score"] >= 70].groupby("sizes")["Student ID"].count()

perc_size_math_pass = size_math_pass/by_size["Student ID"].count()*100

perc_size_math_pass

sizes
Small (<1000)         93.952484
Medium (1000-2000)    93.616522
Large (2000-5000)     68.652380
Name: Student ID, dtype: float64

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,89.883853
Medium (1000-2000),83.374684,83.864438,93.599695,96.79068,90.621535
Large (2000-5000),77.746417,81.344493,69.963361,82.766634,58.286003


## Scores by School Type

* Perform the same operations as above, based on school type

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,90.432244
District,76.956733,80.966636,66.548453,80.799062,53.672208
