In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete['reading_score'] = school_data_complete['reading_score'].astype(float)
school_data_complete['math_score'] = school_data_complete['math_score'].astype(float)
school_data_complete['size'] = school_data_complete['size'].astype(int)
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66.0,79.0,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94.0,61.0,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90.0,60.0,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67.0,58.0,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97.0,84.0,0,District,2917,1910635


In [2]:
# Calculate values for District Summary

In [3]:
total_schools = school_data["School ID"].count()

In [4]:
total_students = float(school_data["size"].sum())

In [5]:
total_budget =  float(school_data["budget"].sum())

In [6]:
average_math = float(school_data_complete["math_score"].mean())

In [7]:
average_reading = float(school_data_complete["reading_score"].mean())

In [8]:
passing_math_scores = school_data_complete.loc[school_data_complete["math_score"] >= 70, [
    "student_name", "Student ID", "math_score"]]
percent_passing_math = (passing_math_scores["Student ID"].nunique() / total_students) * 100

In [9]:
passing_reading_scores = school_data_complete.loc[school_data_complete["reading_score"] >= 70, [
    "student_name", "Student ID", "reading_score"]]
percent_passing_reading = (passing_reading_scores["Student ID"].nunique() / total_students) * 100

In [10]:
overall_passing_rate = (percent_passing_math + percent_passing_reading)/2

In [11]:
# Create and format column values District Summary
district_prelim_summary_df = pd.DataFrame(
    {"Total Schools": [total_schools],
     "Total Unformatted Students": [total_students],
     "Total Unformatted Budget": [total_budget],
     "Average Math Score": [average_math],
     "Average Reading Score": [average_reading],
     "% Passing Math": [percent_passing_math],
     "% Passing Reading": [percent_passing_reading],
     "% Overall Passing Rate": [overall_passing_rate]
     })
district_prelim_summary_df["Total Budget"] = district_prelim_summary_df["Total Unformatted Budget"].map("${:,.2f}".format)
district_prelim_summary_df["Total Students"] = district_prelim_summary_df["Total Unformatted Students"].map("{:,.0f}".format)

In [12]:
# District Summary
district_summary_df = district_prelim_summary_df.loc[:,['Total Schools',
 'Total Students',
 'Total Budget',
 'Average Math Score',
 'Average Reading Score',
 '% Passing Math',
 '% Passing Reading',
 '% Overall Passing Rate']]
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,80.393158


In [13]:
# Group by School Name
grouped_by_school_df = school_data_complete.groupby(['school_name','type','size','budget'])["reading_score","math_score"].mean().reset_index()

In [14]:
# Calculate Per Student Budget
grouped_by_school_df["Per Student Budget Unformatted"] = (grouped_by_school_df["budget"] / grouped_by_school_df["size"])

In [15]:
# Build dataframe for groupby school stats - math scores
school_passing_math_scores = school_data_complete.loc[school_data_complete["math_score"] >= 70, [
    "school_name", "size", "Student ID", "math_score"]]
passing_math_by_school = school_passing_math_scores.groupby(["school_name","size"])["math_score"].count().reset_index()
passing_math_by_school["% Passing Math"] = (passing_math_by_school["math_score"]/passing_math_by_school["size"]) * 100

In [16]:
# Build dataframe for groupby school stats - reading scores
school_passing_reading_scores = school_data_complete.loc[school_data_complete["reading_score"] >= 70, [
    "school_name", "size", "Student ID", "reading_score"]]
passing_reading_by_school = school_passing_reading_scores.groupby(["school_name","size"])["reading_score"].count().reset_index()
passing_reading_by_school["% Passing Reading"] = (passing_reading_by_school["reading_score"]/passing_reading_by_school["size"]) * 100

In [17]:
# Merge dataframes for % Passing Rates by Schools
#merged_passing_rates = pd.merge(passing_math_by_school, passing_reading_by_school, on=['school_name'])
#merged_passing_rates
merged_passing_rates = passing_math_by_school[['school_name', '% Passing Math']].merge(passing_reading_by_school[['school_name', '% Passing Reading']], on='school_name', how='inner')
merged_passing_rates["% Overall Passing Rate"] = (merged_passing_rates["% Passing Math"] + merged_passing_rates["% Passing Reading"])/2

In [18]:
# Merge school stats with test score stats
school_prelim_summary_table = grouped_by_school_df[['school_name','type','size','budget','Per Student Budget Unformatted','math_score','reading_score']].merge(merged_passing_rates[['school_name','% Passing Math','% Passing Reading','% Overall Passing Rate']], on='school_name', how='inner')
school_prelim_summary_table["Total School Budget"] = school_prelim_summary_table["budget"].map("${:,.2f}".format)
school_prelim_summary_table["Per Student Budget"] = school_prelim_summary_table["Per Student Budget Unformatted"].map("${:,.2f}".format)

In [19]:
#Rename and format the table to match direction sample
school_prelim_summary_table.rename(columns={"school_name" : "School Name","type" : "School Type", "size" : "Total Students", "math_score": "Average Math Score", "reading_score":"Average Reading Score"}, inplace=True)
school_summary_table = school_prelim_summary_table.loc[:,['School Name',
 'School Type',
 'Total Students',
 'Total School Budget',
 'Per Student Budget',
 'Average Math Score',
 'Average Reading Score',
 '% Passing Math',
 '% Passing Reading',
 '% Overall Passing Rate']]
school_summary_table

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,74.306672
1,Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
2,Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
3,Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308
4,Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
5,Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,73.807983
6,Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,94.379391
7,Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
8,Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
9,Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027


In [20]:
# School Summary (sorted by % Overall Passing Rate)
school_summary_by_passing_rates = school_summary_table.sort_values("% Overall Passing Rate", ascending = False).reset_index(drop=True)
school_summary_by_passing_rates

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
1,Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.418349,83.84893,93.272171,97.308869,95.29052
2,Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027
3,Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
4,Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.274201,83.989488,93.867718,96.539641,95.203679
5,Wright High School,Charter,1800,"$1,049,400.00",$583.00,83.682222,83.955,93.333333,96.611111,94.972222
6,Shelton High School,Charter,1761,"$1,056,600.00",$600.00,83.359455,83.725724,93.867121,95.854628,94.860875
7,Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,94.379391
8,Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,74.306672
9,Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,73.807983


In [21]:
# Display top five performing schools
school_summary_by_passing_rates.head(5)

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
1,Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.418349,83.84893,93.272171,97.308869,95.29052
2,Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027
3,Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
4,Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.274201,83.989488,93.867718,96.539641,95.203679


In [22]:
#Display bottom five performing schools
school_summary_by_passing_rates.tail(5)

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
10,Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308
11,Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
12,Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
13,Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
14,Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.842711,80.744686,66.366592,80.220055,73.293323


In [23]:
# Create data sets by grade
ninth_grade = school_data_complete[school_data_complete.grade == "9th"]
tenth_grade = school_data_complete[school_data_complete.grade == "10th"]
eleventh_grade = school_data_complete[school_data_complete.grade == "11th"]
twelth_grade = school_data_complete[school_data_complete.grade == "12th"]

In [24]:
#Create series of average math scores by grade
ninth_math_grouped_by_school_df = ninth_grade.groupby(['school_name'])["math_score"].mean().reset_index()
tenth_math_grouped_by_school_df = tenth_grade.groupby(['school_name'])["math_score"].mean().reset_index()
eleventh_math_grouped_by_school_df = eleventh_grade.groupby(['school_name'])["math_score"].mean().reset_index()
twelth_math_grouped_by_school_df = twelth_grade.groupby(['school_name'])["math_score"].mean().reset_index()

In [25]:
# Merge maths scores by grade dataframes and rename columns
merged_math_scores = ninth_math_grouped_by_school_df[['school_name', 'math_score']].merge(tenth_math_grouped_by_school_df[['school_name', 'math_score']], on='school_name', how='inner').reset_index()
merged_math_scores.rename(columns={"math_score_x" : "9th","math_score_y" : "10th"}, inplace=True)
merged_math_scores = merged_math_scores[['school_name', '9th',"10th"]].merge(eleventh_math_grouped_by_school_df[['school_name', 'math_score']], on='school_name', how='inner')
merged_math_scores = merged_math_scores[['school_name', '9th',"10th","math_score"]].merge(twelth_math_grouped_by_school_df[['school_name', 'math_score']], on='school_name', how='inner')
merged_math_scores.rename(columns={"school_name":"School Name","math_score_x" : "11th","math_score_y" : "12th"}, inplace=True)

In [26]:
# Display Average Math Scores by Grade
merged_math_scores

Unnamed: 0,School Name,9th,10th,11th,12th
0,Bailey High School,77.083676,76.996772,77.515588,76.492218
1,Cabrera High School,83.094697,83.154506,82.76556,83.277487
2,Figueroa High School,76.403037,76.539974,76.884344,77.151369
3,Ford High School,77.361345,77.672316,76.918058,76.179963
4,Griffin High School,82.04401,84.229064,83.842105,83.356164
5,Hernandez High School,77.438495,77.337408,77.136029,77.186567
6,Holden High School,83.787402,83.429825,85.0,82.855422
7,Huang High School,77.027251,75.908735,76.446602,77.225641
8,Johnson High School,77.187857,76.691117,77.491653,76.863248
9,Pena High School,83.625455,83.372,84.328125,84.121547


In [27]:
# Create series of average reading scores by grade
ninth_reading_grouped_by_school_df = ninth_grade.groupby(['school_name'])["reading_score"].mean().reset_index()
tenth_reading_grouped_by_school_df = tenth_grade.groupby(['school_name'])["reading_score"].mean().reset_index()
eleventh_reading_grouped_by_school_df = eleventh_grade.groupby(['school_name'])["reading_score"].mean().reset_index()
twelth_reading_grouped_by_school_df = twelth_grade.groupby(['school_name'])["reading_score"].mean().reset_index()

In [28]:
# Merge reading scores by grade dataframes and rename columns
merged_reading_scores = ninth_reading_grouped_by_school_df[['school_name', 'reading_score']].merge(tenth_reading_grouped_by_school_df[['school_name', 'reading_score']], on='school_name', how='inner').reset_index()
merged_reading_scores.rename(columns={"reading_score_x" : "9th","reading_score_y" : "10th"}, inplace=True)
merged_reading_scores = merged_reading_scores[['school_name', '9th',"10th"]].merge(eleventh_reading_grouped_by_school_df[['school_name', 'reading_score']], on='school_name', how='inner')
merged_reading_scores = merged_reading_scores[['school_name', '9th',"10th","reading_score"]].merge(twelth_reading_grouped_by_school_df[['school_name', 'reading_score']], on='school_name', how='inner')
merged_reading_scores.rename(columns={"school_name":"School Name","reading_score_x" : "11th","reading_score_y" : "12th"}, inplace=True)

In [29]:
# Display Average Reading Scores by Grade
merged_reading_scores

Unnamed: 0,School Name,9th,10th,11th,12th
0,Bailey High School,81.303155,80.907183,80.945643,80.912451
1,Cabrera High School,83.676136,84.253219,83.788382,84.287958
2,Figueroa High School,81.198598,81.408912,80.640339,81.384863
3,Ford High School,80.632653,81.262712,80.403642,80.662338
4,Griffin High School,83.369193,83.706897,84.288089,84.013699
5,Hernandez High School,80.86686,80.660147,81.39614,80.857143
6,Holden High School,83.677165,83.324561,83.815534,84.698795
7,Huang High School,81.290284,81.512386,81.417476,80.305983
8,Johnson High School,81.260714,80.773431,80.616027,81.227564
9,Pena High School,83.807273,83.612,84.335938,84.59116


In [30]:
# Create school budget per student bins
school_data_complete["budget_per_student"] = school_data_complete["budget"]/school_data_complete["size"]
print(school_data_complete["budget_per_student"].max())
print(school_data_complete["budget_per_student"].min())
spending_bins = [0, 600, 625, 650, 675]
group_labels = ["$575-599", "$600-624", "$625-649", "$650-675"]

655.0
578.0


In [31]:
# Create base table score averages versus Spending Ranges (Per Student)
school_data_complete["Spending Ranges (Per Student)"] = pd.cut(school_data_complete["budget_per_student"], spending_bins, labels=group_labels)
grouped_data_by_budget = school_data_complete.groupby(["Spending Ranges (Per Student)"])["reading_score","math_score"].mean().reset_index()
group_count_by_budget = school_data_complete.groupby(["Spending Ranges (Per Student)"])["math_score"].count().reset_index()
group_count_by_budget.rename(columns = {"math_score" : "count_in_range"}, inplace=True)
group_count_by_budget
group_scores_and_count_by_budget = pd.merge(grouped_data_by_budget, group_count_by_budget, on="Spending Ranges (Per Student)")

In [32]:
#school_data_complete.head()
#group_counts = budget_passing_math_scores.groupby(["Spending Ranges (Per Student)"])["math_score"].count().reset_index()

In [33]:
# Build dataframe for groupby spending ranges - math scores
budget_passing_math_scores = school_data_complete.copy()
budget_passing_math_scores = school_data_complete.loc[school_data_complete["math_score"] >= 70, ["math_score","Spending Ranges (Per Student)"]]
passing_math_by_budget_count = budget_passing_math_scores.groupby(["Spending Ranges (Per Student)"])["math_score"].count().reset_index()
passing_math_by_budget_count.rename(columns = {"math_score" : "count_passing_math"}, inplace=True)
group_scores_count_math_by_budget = pd.merge(group_scores_and_count_by_budget, passing_math_by_budget_count, on="Spending Ranges (Per Student)")
group_scores_count_math_by_budget["% Passing Math"] = (group_scores_count_math_by_budget["count_passing_math"] / group_scores_count_math_by_budget["count_in_range"]) *100

In [34]:
# Build dataframe for groupby spending ranges - reading scores
budget_passing_reading_scores = school_data_complete.copy()
budget_passing_reading_scores = school_data_complete.loc[school_data_complete["reading_score"] >= 70, ["reading_score","Spending Ranges (Per Student)"]]
passing_reading_by_budget_count = budget_passing_reading_scores.groupby(["Spending Ranges (Per Student)"])["reading_score"].count().reset_index()
passing_reading_by_budget_count.rename(columns = {"reading_score" : "count_passing_reading"}, inplace=True)
group_scores_count_math_reading_by_budget = pd.merge(group_scores_count_math_by_budget, passing_reading_by_budget_count, on="Spending Ranges (Per Student)")
group_scores_count_math_reading_by_budget["% Passing Reading"] = (group_scores_count_math_reading_by_budget["count_passing_reading"] / group_scores_count_math_reading_by_budget["count_in_range"]) *100
group_scores_count_math_reading_by_budget["% Overall Passing Rate"] = (group_scores_count_math_reading_by_budget["% Passing Math"] + group_scores_count_math_reading_by_budget["% Passing Reading"])/2

In [35]:
# Scores By Spending Ranges (budget per student) - Extract desired table info
budget_summary = group_scores_count_math_reading_by_budget.loc[:, ["Spending Ranges (Per Student)", "math_score", "reading_score",
                                    "% Passing Math", "% Passing Reading", "% Overall Passing Rate"]]
budget_summary.rename(columns = {"math_score":"Average Math Score","reading_score":"Average Reading Score"})

Unnamed: 0,Spending Ranges (Per Student),Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,$575-599,83.362283,83.912412,93.738467,96.506335,95.122401
1,$600-624,83.544856,83.906996,93.868313,96.666667,95.26749
2,$625-649,77.469253,81.162258,68.659481,82.131155,75.395318
3,$650-675,77.034693,81.030323,66.340042,81.038136,73.689089


In [36]:
# Create school size bins
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

In [37]:
# Create base table to store averages versus size
school_data_complete["School Size"] = pd.cut(school_data_complete["size"], size_bins, labels=group_names)
grouped_data_by_size = school_data_complete.groupby(["School Size"])["reading_score","math_score"].mean().reset_index()
group_count_by_size = school_data_complete.groupby(["School Size"])["math_score"].count().reset_index()
group_count_by_size.rename(columns = {"math_score" : "count_in_range"}, inplace=True)
#group_count_by_size
group_scores_and_count_by_size = pd.merge(grouped_data_by_size, group_count_by_size, on="School Size")

In [38]:
# Build dataframe for groupby spending ranges - math scores
size_passing_math_scores = school_data_complete.copy()
size_passing_math_scores = school_data_complete.loc[school_data_complete["math_score"] >= 70, ["math_score","School Size"]]
passing_math_by_size_count = size_passing_math_scores.groupby(["School Size"])["math_score"].count().reset_index()
passing_math_by_size_count.rename(columns = {"math_score" : "count_passing_math"}, inplace=True)
group_scores_count_math_by_size = pd.merge(group_scores_and_count_by_size, passing_math_by_size_count, on="School Size")
group_scores_count_math_by_size["% Passing Math"] = (group_scores_count_math_by_size["count_passing_math"] / group_scores_count_math_by_size["count_in_range"]) *100

In [39]:
# Build dataframe for groupby size ranges - reading scores
size_passing_reading_scores = school_data_complete.copy()
size_passing_reading_scores = school_data_complete.loc[school_data_complete["reading_score"] >= 70, ["reading_score","School Size"]]
passing_reading_by_size_count = size_passing_reading_scores.groupby(["School Size"])["reading_score"].count().reset_index()
passing_reading_by_size_count.rename(columns = {"reading_score" : "count_passing_reading"}, inplace=True)
group_scores_count_math_reading_by_size = pd.merge(group_scores_count_math_by_size, passing_reading_by_size_count, on="School Size")
group_scores_count_math_reading_by_size["% Passing Reading"] = (group_scores_count_math_reading_by_size["count_passing_reading"] / group_scores_count_math_reading_by_size["count_in_range"]) *100
group_scores_count_math_reading_by_size["% Overall Passing Rate"] = (group_scores_count_math_reading_by_size["% Passing Math"] + group_scores_count_math_reading_by_size["% Passing Reading"])/2

In [40]:
# Scores By School Size - Extract desired table info
size_summary = group_scores_count_math_reading_by_size.loc[:, ["School Size", "math_score", "reading_score",
                                    "% Passing Math", "% Passing Reading", "% Overall Passing Rate"]]
size_summary.rename(columns = {"math_score":"Average Math Score","reading_score":"Average Reading Score"})

Unnamed: 0,School Size,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,Small (<1000),83.828654,83.974082,93.952484,96.040317,94.9964
1,Medium (1000-2000),83.372682,83.867989,93.616522,96.773058,95.19479
2,Large (2000-5000),77.477597,81.198674,68.65238,82.125158,75.388769


In [41]:
# Create base table to store averages per School Type
#school_data_complete["School Type"] = pd.cut(school_data_complete["size"], size_bins, labels=group_names)
grouped_data_by_type = school_data_complete.groupby(["type"])["reading_score","math_score"].mean().reset_index()
grouped_data_by_type
group_count_by_type = school_data_complete.groupby(["type"])["math_score"].count().reset_index()
group_count_by_type.rename(columns = {"math_score" : "count_in_range"}, inplace=True)
group_count_by_type
group_scores_and_count_by_type = pd.merge(grouped_data_by_type, group_count_by_type, on="type")

In [42]:
# Build dataframe for groupby type ranges - math scores
type_passing_math_scores = school_data_complete.copy()
type_passing_math_scores = school_data_complete.loc[school_data_complete["math_score"] >= 70, ["math_score","type"]]
passing_math_by_type_count = type_passing_math_scores.groupby(["type"])["math_score"].count().reset_index()
passing_math_by_type_count.rename(columns = {"math_score" : "count_passing_math"}, inplace=True)
group_scores_count_math_by_type = pd.merge(group_scores_and_count_by_type, passing_math_by_type_count, on="type")
group_scores_count_math_by_type["% Passing Math"] = (group_scores_count_math_by_type["count_passing_math"] / group_scores_count_math_by_type["count_in_range"]) *100

In [43]:
# Build dataframe for groupby type values - reading scores
type_passing_reading_scores = school_data_complete.copy()
type_passing_reading_scores = school_data_complete.loc[school_data_complete["reading_score"] >= 70, ["reading_score","type"]]
passing_reading_by_type_count = type_passing_reading_scores.groupby(["type"])["reading_score"].count().reset_index()
passing_reading_by_type_count.rename(columns = {"reading_score" : "count_passing_reading"}, inplace=True)
group_scores_count_math_reading_by_type = pd.merge(group_scores_count_math_by_type, passing_reading_by_type_count, on="type")
group_scores_count_math_reading_by_type["% Passing Reading"] = (group_scores_count_math_reading_by_type["count_passing_reading"] / group_scores_count_math_reading_by_type["count_in_range"]) *100
group_scores_count_math_reading_by_type["% Overall Passing Rate"] = (group_scores_count_math_reading_by_type["% Passing Math"] + group_scores_count_math_reading_by_type["% Passing Reading"])/2

In [44]:
# Scores By School Type - Extract desired table info
type_summary = group_scores_count_math_reading_by_type.loc[:, ["type", "math_score", "reading_score",
                                    "% Passing Math", "% Passing Reading", "% Overall Passing Rate"]]
type_summary.rename(columns = {"type" : "School Type","math_score":"Average Math Score","reading_score":"Average Reading Score"})

Unnamed: 0,School Type,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,Charter,83.406183,83.902821,93.701821,96.645891,95.173856
1,District,76.987026,80.962485,66.518387,80.905249,73.711818
