First Observable Trend based on the data:
All top performing schools based on overall passing rate are charter schools, and all bottom performing schools are District schools. There is a clearoutperformance by Charter schools.   

Second Observable Trend: 
Overall scores (math or reading) do not tend to increase as students progress to higher grades. 9th grade scores are quite similar to 12th grade scores in both subjects. 

In [23]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

#header to confirm accurate reading and merging of data 
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [24]:
# check for missing data
school_data_complete.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [25]:
#check data types 
school_data_complete.dtypes

Student ID        int64
student_name     object
gender           object
grade            object
school_name      object
reading_score     int64
math_score        int64
School ID         int64
type             object
size              int64
budget            int64
dtype: object

## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [26]:
#get the total number of schools using .nunique() 
schoolscount = school_data_complete["school_name"].nunique()
print(schoolscount)

# use .sum() to find total number of students
totalstudentcount = school_data_complete["student_name"].count()
print(totalstudentcount)

# index according to school name 
schooldataindexed = school_data_complete.set_index(["school_name"])

# group data according to school name
schooldataindexedgrouped = schooldataindexed.groupby(['school_name'])

# budget: create a workable list of unique values per school, then sum up budget of each individual using the workable list of unique values
uniqueschools = schooldataindexedgrouped.max()
 
budgettotal = uniqueschools["budget"].sum()
print(budgettotal)

# use .mean() data function to calculate average math score 
averagemath = school_data_complete["math_score"].mean()
print(averagemath)

# use .mean() data function to calculate average reading score 
averagereading = school_data_complete["reading_score"].mean()
print(averagereading)

# use .loc to identify students who passed math 
passingmath = school_data_complete.loc[school_data_complete["math_score"]>=70,:]
totalpassingmath = passingmath["student_name"].count()

# calculate percentage of students passing math 
percentpassingmath = (totalpassingmath/totalstudentcount)*100

#print all values for transparency/visibility 
print (totalpassingmath)
print (totalstudentcount)
print (percentpassingmath)

# use .loc to identify students who passed reading
passingreading = school_data_complete.loc[school_data_complete["reading_score"]>=70,:]
totalpassingreading = passingreading["student_name"].count()

# calculate percentage of students passing reading 
percentpassingreading = (totalpassingreading/totalstudentcount)*100

#print all values for transparency/visibility 
print (totalpassingreading)
print (totalstudentcount)
print (percentpassingreading)

#calculate overall average score 
overallaveragescore = (averagemath + averagereading)/2
print(overallaveragescore)

15
39170
24649428
78.98537145774827
81.87784018381414
29370
39170
74.9808526933878
33610
39170
85.80546336482001
80.43160582078121


In [27]:
# Create dataframe for District Summary statistics 
districtsummarydict = [{"Total Schools":schoolscount, 
                        "Total Students":totalstudentcount,
                       "Total Budget":budgettotal, 
                        "Average Math Score":averagemath, 
                        "Average Reading Score":averagereading,
                       "% Passing Math":percentpassingmath, 
                        "% Passing Reading":percentpassingreading, 
                        "% Overall Passing Rate":overallaveragescore}]

DistrictSummary = pd.DataFrame(districtsummarydict)

DistrictSummary['Total Budget'] = (DistrictSummary['Total Budget']).apply(lambda x: '${:,.2f}'.format(x))
DistrictSummary['Average Math Score'] = (DistrictSummary['Average Math Score']).apply(lambda x: '{:,.2f}'.format(x))
DistrictSummary['Average Reading Score'] = (DistrictSummary['Average Reading Score']).apply(lambda x: '{:,.2f}'.format(x))
DistrictSummary['% Passing Math'] = (DistrictSummary['% Passing Math']).apply(lambda x: '{:,.2f}%'.format(x))
DistrictSummary['% Passing Reading'] = (DistrictSummary['% Passing Reading']).apply(lambda x: '{:,.2f}%'.format(x))
DistrictSummary['% Overall Passing Rate'] = (DistrictSummary['% Overall Passing Rate']).apply(lambda x: '{:,.2f}%'.format(x))

DistrictSummary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,80.43%


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)
  
* Create a dataframe to hold the above results

In [28]:
# show header of indexed and grouped dataframe 
schooldataindexedgrouped.head()

Unnamed: 0_level_0,Student ID,student_name,gender,grade,reading_score,math_score,School ID,type,size,budget
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Huang High School,0,Paul Bradley,M,9th,66,79,0,District,2917,1910635
Huang High School,1,Victor Smith,M,12th,94,61,0,District,2917,1910635
Huang High School,2,Kevin Rodriguez,M,12th,90,60,0,District,2917,1910635
Huang High School,3,Dr. Richard Scott,M,12th,67,58,0,District,2917,1910635
Huang High School,4,Bonnie Ray,F,9th,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...
Thomas High School,37535,Norma Mata,F,10th,76,76,14,Charter,1635,1043130
Thomas High School,37536,Cody Miller,M,11th,84,82,14,Charter,1635,1043130
Thomas High School,37537,Erik Snyder,M,9th,80,90,14,Charter,1635,1043130
Thomas High School,37538,Tanya Martinez,F,9th,71,69,14,Charter,1635,1043130


In [29]:
# Get school type
school_type = schooldataindexedgrouped["type"].unique()

# Get total student count per school
total_student = schooldataindexedgrouped['student_name'].count()

# get total budget per school, .mean() will suffice since all entries in the budget column are the same 
total_school_budget = schooldataindexedgrouped['budget'].mean()

# get per student budget per school 
per_student_budget =  total_school_budget / total_student

# get average math score per school 
average_math = schooldataindexedgrouped['math_score'].mean()

# get average reading score per school 
average_reading = schooldataindexedgrouped['reading_score'].mean()

# Get percent passing math
passingmathcount = passingmath['school_name'].value_counts()
passingmathpctperschool = (passingmathcount / total_student)*100

# Get percent passing reading 
passingreadingcount = passingreading['school_name'].value_counts()
passingreadingpctperschool = (passingreadingcount / total_student)*100

# overall percent passing (Average of the above two)
overallpctpassingperschool = (passingmathpctperschool + passingreadingpctperschool) / 2

# Create a dataframe to hold the above results
schoolsummary = pd.DataFrame({"School Type": school_type,    
                        "Total Students": total_student,
                        "Total School Budget": total_school_budget,
                        "Per Student Budget": per_student_budget,
                         "Average Math Score": average_math,
                         "Average Reading Score": average_reading,
                         "% Passing Math": passingmathpctperschool,
                         "% Passing Reading": passingreadingpctperschool,
                         "% Overall Passing Rate": overallpctpassingperschool})

schoolsummary['Total School Budget'] = (schoolsummary['Total School Budget']).apply(lambda x: '${:,.2f}'.format(x))
schoolsummary['Per Student Budget'] = (schoolsummary['Per Student Budget']).apply(lambda x: '${:,.2f}'.format(x))
schoolsummary['% Passing Math'] = (schoolsummary['% Passing Math']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsummary['% Passing Reading'] = (schoolsummary['% Passing Reading']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsummary['% Overall Passing Rate'] = (schoolsummary['% Overall Passing Rate']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsummary['Average Math Score'] = (schoolsummary['Average Math Score']).apply(lambda x: '{:,.2f}'.format(x))
schoolsummary['Average Reading Score'] = (schoolsummary['Average Reading Score']).apply(lambda x: '{:,.2f}'.format(x))

schoolsummary.head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,[District],4976,"$3,124,928.00",$628.00,77.05,81.03,66.68%,81.93%,74.31%
Cabrera High School,[Charter],1858,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,95.59%
Figueroa High School,[District],2949,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,73.36%
Ford High School,[District],2739,"$1,763,916.00",$644.00,77.1,80.75,68.31%,79.30%,73.80%
Griffin High School,[Charter],1468,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,95.27%


## Top Performing Schools (By Passing Rate)

* Sort and display the top five schools in overall passing rate

In [30]:
# Sort and display 5 top performing schools based on Overall % Passing Rate
topperformingschools = schoolsummary.sort_values("% Overall Passing Rate", ascending=False)
topperformingschools.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,[Charter],1858,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,95.59%
Thomas High School,[Charter],1635,"$1,043,130.00",$638.00,83.42,83.85,93.27%,97.31%,95.29%
Griffin High School,[Charter],1468,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,95.27%
Pena High School,[Charter],962,"$585,858.00",$609.00,83.84,84.04,94.59%,95.95%,95.27%
Wilson High School,[Charter],2283,"$1,319,574.00",$578.00,83.27,83.99,93.87%,96.54%,95.20%


## Bottom Performing Schools (By Passing Rate)

* Sort and display the five worst-performing schools

In [31]:
# Sort and display 5 top performing schools based on Overall % Passing Rate
bottomperformingschools = schoolsummary.sort_values("% Overall Passing Rate")
bottomperformingschools.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,[District],3999,"$2,547,363.00",$637.00,76.84,80.74,66.37%,80.22%,73.29%
Figueroa High School,[District],2949,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,73.36%
Huang High School,[District],2917,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,73.50%
Johnson High School,[District],4761,"$3,094,650.00",$650.00,77.07,80.97,66.06%,81.22%,73.64%
Ford High School,[District],2739,"$1,763,916.00",$644.00,77.1,80.75,68.31%,79.30%,73.80%


## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [32]:
# create a series for each grade, then group those by school 

grade9th = school_data_complete.loc[school_data_complete["grade"] == "9th"]
grade9th.head(10)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
12,12,Brittney Walker,F,9th,Huang High School,64,79,0,District,2917,1910635
13,13,William Long,M,9th,Huang High School,71,79,0,District,2917,1910635
16,16,Donald Zamora,M,9th,Huang High School,88,55,0,District,2917,1910635
17,17,Kimberly Santiago,F,9th,Huang High School,74,75,0,District,2917,1910635
18,18,Kevin Stevens,M,9th,Huang High School,64,69,0,District,2917,1910635
19,19,Brandi Lyons,F,9th,Huang High School,89,80,0,District,2917,1910635
23,23,Christopher Parker,M,9th,Huang High School,81,68,0,District,2917,1910635


In [33]:
# duplicate above code for grades 10, 11 and 12
grade10th = school_data_complete.loc[school_data_complete["grade"] == "10th"]
grade11th = school_data_complete.loc[school_data_complete["grade"] == "11th"]
grade12th = school_data_complete.loc[school_data_complete["grade"] == "12th"]

# group all schools by school name 
grouped9th = grade9th.groupby('school_name')
grouped10th = grade10th.groupby('school_name')
grouped11th = grade11th.groupby('school_name')
grouped12th = grade12th.groupby('school_name')

# get header of one to confirm code works 
grouped12th.head(10)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
7,7,Nicole Baker,F,12th,Huang High School,96,69,0,District,2917,1910635
29,29,Nicole Brown,F,12th,Huang High School,90,88,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
37582,37582,Adam Gibbs,M,12th,Thomas High School,88,81,14,Charter,1635,1043130
37583,37583,Stephanie Hayden,F,12th,Thomas High School,83,81,14,Charter,1635,1043130
37585,37585,Angela Edwards,F,12th,Thomas High School,84,95,14,Charter,1635,1043130
37588,37588,Christopher Riley,M,12th,Thomas High School,73,83,14,Charter,1635,1043130


In [34]:
# get averages of each school 
averagemath9th = grouped9th['math_score'].mean()
averagemath10th = grouped10th['math_score'].mean()
averagemath11th = grouped11th['math_score'].mean()
averagemath12th = grouped12th['math_score'].mean()

# get header of one to confirm code works
averagemath9th.head()

school_name
Bailey High School      77.083676
Cabrera High School     83.094697
Figueroa High School    76.403037
Ford High School        77.361345
Griffin High School     82.044010
Name: math_score, dtype: float64

In [35]:
# Create a dataframe of average math scores by grade and school 

AveragemathbyGrade = pd.DataFrame({     
                        "9th Math": averagemath9th,
                        "10th Math": averagemath10th,
                        "11th Math": averagemath11th,
                         "12th Math": averagemath12th})

AveragemathbyGrade['9th Math'] = (AveragemathbyGrade['9th Math']).apply(lambda x: '{:,.2f}'.format(x))
AveragemathbyGrade['10th Math'] = (AveragemathbyGrade['10th Math']).apply(lambda x: '{:,.2f}'.format(x))
AveragemathbyGrade['11th Math'] = (AveragemathbyGrade['11th Math']).apply(lambda x: '{:,.2f}'.format(x))
AveragemathbyGrade['12th Math'] = (AveragemathbyGrade['12th Math']).apply(lambda x: '{:,.2f}'.format(x))

AveragemathbyGrade


Unnamed: 0_level_0,9th Math,10th Math,11th Math,12th Math
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [36]:
# each grade series has already been created and grouped by school name 
# get average reading scores of each school 
averagereading9th = grouped9th['reading_score'].mean()
averagereading10th = grouped10th['reading_score'].mean()
averagereading11th = grouped11th['reading_score'].mean()
averagereading12th = grouped12th['reading_score'].mean()

# Create a dataframe of average math scores by grade and school 
AveragereadingbyGrade = pd.DataFrame({     
                        "9th Reading": averagereading9th,
                        "10th Reading": averagereading10th,
                        "11th Reading": averagereading11th,
                         "12th Reading": averagereading12th})

AveragereadingbyGrade['9th Reading'] = (AveragereadingbyGrade['9th Reading']).apply(lambda x: '{:,.2f}'.format(x))
AveragereadingbyGrade['10th Reading'] = (AveragereadingbyGrade['10th Reading']).apply(lambda x: '{:,.2f}'.format(x))
AveragereadingbyGrade['11th Reading'] = (AveragereadingbyGrade['11th Reading']).apply(lambda x: '{:,.2f}'.format(x))
AveragereadingbyGrade['12th Reading'] = (AveragereadingbyGrade['12th Reading']).apply(lambda x: '{:,.2f}'.format(x))

AveragereadingbyGrade

Unnamed: 0_level_0,9th Reading,10th Reading,11th Reading,12th Reading
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [44]:
bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

schoolsummary["Spending Ranges (Per Student)"] = pd.cut(schoolsummary['Per Student Budget'], bins=bins, labels = group_names)

groupedbyspendingrange = schoolsummary.groupby(['Spending Ranges (Per Student)'])

spendingaveragemath = groupedbyspendingrange["Average Math Score"].mean()
spendingaveragereading = groupedbyspendingrange["Average Reading Score"].mean()
spendingpercentmath = groupedbyspendingrange["% Passing Math"].mean()
spendingpercentreading = groupedbyspendingrange["% Passing Reading"].mean()
spendingpercentpass = (spendingpercentmath + spendingpercentreading)/2


schoolspendingrangedf = pd.DataFrame({"Average Math Score":spendingaveragemath,
                                          "Average Reading Score":spendingaveragereading,
                                          "% Passing Math":spendingpercentmath,
                                          "% Passing Reading":spendingpercentreading,
                                          "% Overall Passing Rate":spendingpercentpass
                                         })

schoolspendingrangedf['% Passing Math'] = (schoolspendingrangedf['% Passing Math']).apply(lambda x: '{:,.2f}%'.format(x))
schoolspendingrangedf['% Passing Reading'] = (schoolspendingrangedf['% Passing Reading']).apply(lambda x: '{:,.2f}%'.format(x))
schoolspendingrangedf['% Overall Passing Rate'] = (schoolspendingrangedf['% Overall Passing Rate']).apply(lambda x: '{:,.2f}%'.format(x))
schoolspendingrangedf['Average Math Score'] = (schoolspendingrangedf['Average Math Score']).apply(lambda x: '{:,.2f}'.format(x))
schoolspendingrangedf['Average Reading Score'] = (schoolspendingrangedf['Average Reading Score']).apply(lambda x: '{:,.2f}'.format(x))

schoolspendingrangedf

TypeError: '<' not supported between instances of 'int' and 'str'

## Scores by School Size

* Perform the same operations as above, based on school size.

In [38]:
# Sample bins. Feel free to create your own bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

schoolsummary['School Size'] = pd.cut(schoolsummary['Total Students'], 
                                                         bins = size_bins, labels = group_names)

groupedbyschoolsize = schoolsummary.groupby(['School Size'])

schoolsizeaveragemath = groupedbyschoolsize["Average Math Score"].mean()
schoolsizeaveragereading = groupedbyschoolsize["Average Reading Score"].mean()
schoolsizepercentmath = groupedbyschoolsize["% Passing Math"].mean()
schoolsizepercentreading = groupedbyschoolsize["% Passing Reading"].mean()
schoolsizepercentpass = (schoolsizepercentmath + schoolsizepercentreading)/2


schoolsizedf = pd.DataFrame({"Average Math Score":schoolsizeaveragemath,
                                          "Average Reading Score": schoolsizeaveragereading,
                                          "% Passing Math": schoolsizepercentmath,
                                          "% Passing Reading": schoolsizepercentreading,
                                          "% Overall Passing Rate":schoolsizepercentpass
                                         })

schoolsizedf['% Passing Math'] = (schoolsizedf['% Passing Math']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsizedf['% Passing Reading'] = (schoolsizedf['% Passing Reading']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsizedf['% Overall Passing Rate'] = (schoolsizedf['% Overall Passing Rate']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsizedf['Average Math Score'] = (schoolsizedf['Average Math Score']).apply(lambda x: '{:,.2f}'.format(x))
schoolsizedf['Average Reading Score'] = (schoolsizedf['Average Reading Score']).apply(lambda x: '{:,.2f}'.format(x))

schoolsizedf

DataError: No numeric types to aggregate

## Scores by School Type

* Perform the same operations as above, based on school type.

In [39]:
groupedbyschooltype = schoolsummary.groupby(['School Type'])

schooltypeaveragemath = groupedbyschooltype["Average Math Score"].mean()
schooltypeaveragereading = groupedbyschooltype["Average Reading Score"].mean()
schooltypepercentmath = groupedbyschooltype["%Passing Math"].mean()
schooltypepercentreading = groupedbyschooltype["%Passing reading"].mean()
schooltypepercentpass = (schooltypepercentmath + schooltypepercentreading)/2


schooltypedf = pd.DataFrame({"Average Math Score": schooltypeaveragemath,
                                          "Average Reading Score": schooltypeaveragereading,
                                          "%Passing Math": schooltypepercentmath,
                                          "%Passing Reading":schooltypepercentreading,
                                          "%Overall Passing Rate": schooltypepercentpass
                                         })

schoolsizedf['% Passing Math'] = (schoolsizedf['% Passing Math']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsizedf['% Passing Reading'] = (schoolsizedf['% Passing Reading']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsizedf['% Overall Passing Rate'] = (schoolsizedf['% Overall Passing Rate']).apply(lambda x: '{:,.2f}%'.format(x))
schoolsizedf['Average Math Score'] = (schoolsizedf['Average Math Score']).apply(lambda x: '{:,.2f}'.format(x))
schoolsizedf['Average Reading Score'] = (schoolsizedf['Average Reading Score']).apply(lambda x: '{:,.2f}'.format(x))

schooltypedf

DataError: No numeric types to aggregate