In [None]:
#Analysis goes here!

In [181]:
#Dependencies and Setup
import pandas as pd
from pathlib import Path

In [182]:
#Loading in data
schools_data = Path("../Resources/schools_complete.csv")
students_data = Path("../Resources/students_complete.csv")

In [183]:
#Read files and store in DFs
schools_df = pd.read_csv(schools_data)
students_df = pd.read_csv(students_data)

In [184]:
#Combine data to single dataframe
combined_df = pd.merge(schools_df, students_df, how="left", on=["school_name", "school_name"])
combined_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [185]:
combined_df.shape

(39170, 11)

In [186]:
#Number of unique schools
school_count = len(combined_df["school_name"].unique())
school_count

15

In [187]:
#Number of students
student_count = len(combined_df["student_name"])
student_count

39170

In [188]:
#Total combined budget
unique_budget = combined_df["budget"].unique()
total_budget = unique_budget.sum()
total_budget

24649428

In [189]:
#Average math score
average_math_score = combined_df["math_score"].mean()
average_math_score

78.98537145774827

In [190]:
#Average reading score
average_reading_score = combined_df["reading_score"].mean()
average_reading_score

81.87784018381414

In [191]:
#Percentage of students who passed math (score >= 70)
passing_math_count = combined_df[(combined_df['math_score'] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(student_count) * 100
passing_math_percentage

74.9808526933878

In [192]:
#Percentage of students who passed reading (score >= 70)
passing_reading_count = combined_df[(combined_df['reading_score'] >= 70)].count()["student_name"]
passing_reading_percentage = passing_reading_count / float(student_count) * 100
passing_reading_percentage

85.80546336482001

In [193]:
#Percentage of students who passed both math and reading
passing_both_count = combined_df[
    (combined_df["math_score"] >= 70) & (combined_df["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_both_count / float(student_count) * 100
overall_passing_rate

65.17232575950983

In [194]:
#"high-level" snapshot of district's key metrics
district_summary = pd.DataFrame({"Total Schools": [school_count],
                                 "Total Students": [student_count],
                                 "Total Budget": [total_budget],
                                 "Average Math Score": [average_math_score],
                                 "Average Reading Score": [average_reading_score],
                                 "Percent Passing Math": [passing_math_percentage],
                                 "Percent Passing Reading": [passing_reading_percentage],
                                 "Overall Pass Rate": [overall_passing_rate]})
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Pass Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


In [195]:
#School types
school_types = combined_df["type"].unique()
school_types

array(['District', 'Charter'], dtype=object)

In [196]:
#Total student count per school
per_school_counts = combined_df["school_name"].value_counts()
per_school_counts

school_name
Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Huang High School        2917
Ford High School         2739
Wilson High School       2283
Cabrera High School      1858
Wright High School       1800
Shelton High School      1761
Thomas High School       1635
Griffin High School      1468
Pena High School          962
Holden High School        427
Name: count, dtype: int64

In [197]:
#Budget per school
school_names = combined_df["school_name"].unique()
budgets_per = pd.DataFrame({"School Name": school_names, "Budget": unique_budget})
budgets_per

Unnamed: 0,School Name,Budget
0,Huang High School,1910635
1,Figueroa High School,1884411
2,Shelton High School,1056600
3,Hernandez High School,3022020
4,Griffin High School,917500
5,Wilson High School,1319574
6,Cabrera High School,1081356
7,Bailey High School,3124928
8,Holden High School,248087
9,Pena High School,585858


In [198]:
#Per capita spending for each school
school_size = combined_df["size"].unique()
per_student = unique_budget / school_size
per_capita = pd.DataFrame({"School Name": school_names, "Per capita Spend": per_student})
per_capita

Unnamed: 0,School Name,Per capita Spend
0,Huang High School,655.0
1,Figueroa High School,639.0
2,Shelton High School,600.0
3,Hernandez High School,652.0
4,Griffin High School,625.0
5,Wilson High School,578.0
6,Cabrera High School,582.0
7,Bailey High School,628.0
8,Holden High School,581.0
9,Pena High School,609.0


In [199]:
group_schools = combined_df.groupby(['school_name'])
group_schools.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84
...,...,...,...,...,...,...,...,...,...,...,...
37535,14,Thomas High School,Charter,1635,1043130,37535,Norma Mata,F,10th,76,76
37536,14,Thomas High School,Charter,1635,1043130,37536,Cody Miller,M,11th,84,82
37537,14,Thomas High School,Charter,1635,1043130,37537,Erik Snyder,M,9th,80,90
37538,14,Thomas High School,Charter,1635,1043130,37538,Tanya Martinez,F,9th,71,69


In [200]:
#Average math score by school
avg_math_by_school = group_schools['math_score'].mean()
avg_math_by_school

school_name
Bailey High School       77.048432
Cabrera High School      83.061895
Figueroa High School     76.711767
Ford High School         77.102592
Griffin High School      83.351499
Hernandez High School    77.289752
Holden High School       83.803279
Huang High School        76.629414
Johnson High School      77.072464
Pena High School         83.839917
Rodriguez High School    76.842711
Shelton High School      83.359455
Thomas High School       83.418349
Wilson High School       83.274201
Wright High School       83.682222
Name: math_score, dtype: float64

In [201]:
#Average reading score by school
avg_reading_by_school = group_schools['reading_score'].mean()
avg_reading_by_school

school_name
Bailey High School       81.033963
Cabrera High School      83.975780
Figueroa High School     81.158020
Ford High School         80.746258
Griffin High School      83.816757
Hernandez High School    80.934412
Holden High School       83.814988
Huang High School        81.182722
Johnson High School      80.966394
Pena High School         84.044699
Rodriguez High School    80.744686
Shelton High School      83.725724
Thomas High School       83.848930
Wilson High School       83.989488
Wright High School       83.955000
Name: reading_score, dtype: float64

In [202]:
#Combined df of Average Scores
avg_both_df = pd.DataFrame({"Average Math Score": avg_math_by_school,
                            "Average Reading Score": avg_reading_by_school}).reset_index()
avg_both_df


Unnamed: 0,school_name,Average Math Score,Average Reading Score
0,Bailey High School,77.048432,81.033963
1,Cabrera High School,83.061895,83.97578
2,Figueroa High School,76.711767,81.15802
3,Ford High School,77.102592,80.746258
4,Griffin High School,83.351499,83.816757
5,Hernandez High School,77.289752,80.934412
6,Holden High School,83.803279,83.814988
7,Huang High School,76.629414,81.182722
8,Johnson High School,77.072464,80.966394
9,Pena High School,83.839917,84.044699


In [203]:
#Number of students per school with math scores >= 70
math_pass = combined_df.loc[combined_df['math_score'] >=70]
total_math_pass = pd.DataFrame({"school_name": math_pass["school_name"],
                                "Students Passing Math": ""})
math_pass_by_school = total_math_pass.groupby(['school_name']).count().reset_index()
math_pass_by_school

Unnamed: 0,school_name,Students Passing Math
0,Bailey High School,3318
1,Cabrera High School,1749
2,Figueroa High School,1946
3,Ford High School,1871
4,Griffin High School,1371
5,Hernandez High School,3094
6,Holden High School,395
7,Huang High School,1916
8,Johnson High School,3145
9,Pena High School,910


In [204]:
#Number of students per school with reading scores >= 70
reading_pass = combined_df.loc[combined_df['reading_score'] >=70]
total_reading_pass = pd.DataFrame({"school_name": reading_pass["school_name"],
                                "Students Passing Reading": ""})
reading_pass_by_school = total_reading_pass.groupby(['school_name']).count().reset_index()
reading_pass_by_school

Unnamed: 0,school_name,Students Passing Reading
0,Bailey High School,4077
1,Cabrera High School,1803
2,Figueroa High School,2381
3,Ford High School,2172
4,Griffin High School,1426
5,Hernandez High School,3748
6,Holden High School,411
7,Huang High School,2372
8,Johnson High School,3867
9,Pena High School,923


In [205]:
#Number of students per school with math AND reading scores >= 70
both_pass = combined_df.loc[(combined_df['reading_score'] >=70) & (combined_df['math_score'] >=70)]
total_both_pass = pd.DataFrame({"School": both_pass["school_name"],
                                "Students Passing Both Math & Reading": ""})
both_pass_by_school = total_both_pass.groupby(['School']).count().reset_index()
both_pass_by_school

Unnamed: 0,School,Students Passing Both Math & Reading
0,Bailey High School,2719
1,Cabrera High School,1697
2,Figueroa High School,1569
3,Ford High School,1487
4,Griffin High School,1330
5,Hernandez High School,2481
6,Holden High School,381
7,Huang High School,1561
8,Johnson High School,2549
9,Pena High School,871


In [206]:
#Master Table
master_df = pd.merge(schools_df, avg_both_df, on = 'school_name') \
.merge(math_pass_by_school, on = 'school_name') \
.merge(reading_pass_by_school, on = 'school_name')
master_df

Unnamed: 0,School ID,school_name,type,size,budget,Average Math Score,Average Reading Score,Students Passing Math,Students Passing Reading
0,0,Huang High School,District,2917,1910635,76.629414,81.182722,1916,2372
1,1,Figueroa High School,District,2949,1884411,76.711767,81.15802,1946,2381
2,2,Shelton High School,Charter,1761,1056600,83.359455,83.725724,1653,1688
3,3,Hernandez High School,District,4635,3022020,77.289752,80.934412,3094,3748
4,4,Griffin High School,Charter,1468,917500,83.351499,83.816757,1371,1426
5,5,Wilson High School,Charter,2283,1319574,83.274201,83.989488,2143,2204
6,6,Cabrera High School,Charter,1858,1081356,83.061895,83.97578,1749,1803
7,7,Bailey High School,District,4976,3124928,77.048432,81.033963,3318,4077
8,8,Holden High School,Charter,427,248087,83.803279,83.814988,395,411
9,9,Pena High School,Charter,962,585858,83.839917,84.044699,910,923


In [207]:
#Percent Passing Math
percent_passing_math = (master_df['Students Passing Math'] / master_df['size']) * 100
percent_passing_math

0     65.683922
1     65.988471
2     93.867121
3     66.752967
4     93.392371
5     93.867718
6     94.133477
7     66.680064
8     92.505855
9     94.594595
10    93.333333
11    66.366592
12    66.057551
13    68.309602
14    93.272171
dtype: float64

In [208]:
#Percent Passing Reading
percent_passing_reading = (master_df['Students Passing Reading'] / master_df['size']) *100
percent_passing_reading

0     81.316421
1     80.739234
2     95.854628
3     80.862999
4     97.138965
5     96.539641
6     97.039828
7     81.933280
8     96.252927
9     95.945946
10    96.611111
11    80.220055
12    81.222432
13    79.299014
14    97.308869
dtype: float64

In [216]:
#Sorting Master df alphabetically for next step
sorted_master_df = master_df.sort_values('school_name').reset_index()
sorted_master_df

Unnamed: 0,index,School ID,school_name,type,size,budget,Average Math Score,Average Reading Score,Students Passing Math,Students Passing Reading
0,7,7,Bailey High School,District,4976,3124928,77.048432,81.033963,3318,4077
1,6,6,Cabrera High School,Charter,1858,1081356,83.061895,83.97578,1749,1803
2,1,1,Figueroa High School,District,2949,1884411,76.711767,81.15802,1946,2381
3,13,13,Ford High School,District,2739,1763916,77.102592,80.746258,1871,2172
4,4,4,Griffin High School,Charter,1468,917500,83.351499,83.816757,1371,1426
5,3,3,Hernandez High School,District,4635,3022020,77.289752,80.934412,3094,3748
6,8,8,Holden High School,Charter,427,248087,83.803279,83.814988,395,411
7,0,0,Huang High School,District,2917,1910635,76.629414,81.182722,1916,2372
8,12,12,Johnson High School,District,4761,3094650,77.072464,80.966394,3145,3867
9,9,9,Pena High School,Charter,962,585858,83.839917,84.044699,910,923


In [218]:
#Percent Passing Both Math + Reading

percent_passing_both = (both_pass_by_school['Students Passing Both Math & Reading'] / sorted_master_df['size']) * 100

percent_passing_both

0     54.642283
1     91.334769
2     53.204476
3     54.289887
4     90.599455
5     53.527508
6     89.227166
7     53.513884
8     53.539172
9     90.540541
10    52.988247
11    89.892107
12    90.948012
13    90.582567
14    90.333333
dtype: float64

In [222]:
#District Summary df
schools_summary_df = pd.DataFrame({'School': master_df['school_name'],
                                  'School Type': master_df['type'],
                                  'Total Students': master_df['size'],
                                  'School Budget': master_df['budget'],
                                  'Per Capita Budget': (master_df['budget'] / master_df['size']),
                                  'Average Math Score': master_df['Average Math Score'],
                                  'Average Reading Score': master_df['Average Reading Score'],
                                  'Percent Passing Math': percent_passing_math,
                                  'Percent Passing Reading': percent_passing_reading,
                                  'Percent Passing Both': percent_passing_both})
schools_summary_df.set_index('School')

Unnamed: 0_level_0,School Type,Total Students,School Budget,Per Capita Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Both
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,54.642283
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,91.334769
Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628,53.204476
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,54.289887
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,53.527508
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,89.227166
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,53.513884
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,53.539172
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [224]:
#Top Performers
top_performers = schools_summary_df.nlargest(5, 'Percent Passing Both')
top_performers

Unnamed: 0,School,School Type,Total Students,School Budget,Per Capita Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Both
1,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,91.334769
12,Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,90.948012
4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
13,Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,90.582567
9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [225]:
#Bottom Performers
bottom_performers = schools_summary_df.nsmallest(5, 'Percent Passing Both')
bottom_performers

Unnamed: 0,School,School Type,Total Students,School Budget,Per Capita Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Both
10,Wright High School,Charter,1800,1049400,583.0,83.682222,83.955,93.333333,96.611111,52.988247
2,Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628,53.204476
7,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,53.513884
5,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,53.527508
8,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,53.539172


In [284]:
#Math Scores by Grade
ninth_grade = combined_df[combined_df['grade'] == '9th']
tenth_grade = combined_df[combined_df['grade'] == '10th']
eleventh_grade = combined_df[combined_df['grade'] == '11th']
twelfth_grade = combined_df[combined_df['grade'] == '12th']

ninth_grade_df = pd.DataFrame({"School": ninth_grade['school_name'],
                              "ninth_math": ninth_grade['math_score'],
                              "ninth_reading": ninth_grade['reading_score']})
tenth_grade_df = pd.DataFrame({"School": tenth_grade['school_name'],
                              "tenth_math": tenth_grade['math_score'],
                              "tenth_reading": tenth_grade['reading_score']})
eleventh_grade_df = pd.DataFrame({"School": eleventh_grade['school_name'],
                              "eleventh_math": eleventh_grade['math_score'],
                              "eleventh_reading": eleventh_grade['reading_score']})
twelfth_grade_df = pd.DataFrame({"School": twelfth_grade['school_name'],
                              "twelfth_math": twelfth_grade['math_score'],
                              "twelfth_reading": twelfth_grade['reading_score']})

by_grade_scores = pd.merge(ninth_grade_df, tenth_grade_df, on = 'School') \
.merge(eleventh_grade_df, on = 'School') \
.merge(twelfth_grade_df, on = 'School')

math_by_grade = pd.DataFrame({'School': by_grade_scores['School'],
                             '9th': by_grade_scores['ninth_math'],
                             '10th': by_grade_scores['tenth_math'],
                             '11th': by_grade_scores['eleventh_math'],
                             '12th': by_grade_scores['twelfth_math']})

math_by_grade = math_by_grade[['School', '9th', '10th', '11th', '12th']].set_index('School')
math_by_grade


#I don't know how to fix this

MemoryError: Unable to allocate 70.1 GiB for an array with shape (9411897763,) and data type int64

In [285]:
#Reading Scores by Grade
reading_by_grade = pd.DataFrame({'School': by_grade_scores['School'],
                             '9th': by_grade_scores['ninth_reading'],
                             '10th': by_grade_scores['tenth_reading'],
                             '11th': by_grade_scores['eleventh_reading'],
                             '12th': by_grade_scores['twelfth_reading']})

reading_by_grade = reading_by_grade[['School', '9th', '10th', '11th', '12th']].set_index('School')
reading_by_grade


#I don't know how to fix this either because the merge wouldn't work above

KeyError: 'eleventh_reading'

In [306]:
#Scores by Spending
spend_per_score = schools_summary_df.copy()
spending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]

spend_per_score['Per Capita Budget'] = spend_per_score['Per Capita Budget'].replace('[\$]', '', regex=True).astype('float')

spend_per_score['Spending Range'] = pd.cut(spend_per_score['Per Capita Budget'], spending_bins, labels)

spend_per_score = spend_per_score[['Average Math Score',
                                  'Average Reading Score',
                                  'Percent Passing Math',
                                  'Percent Passing Reading',
                                  'Percent Passing Both',
                                  'Spending Range']]

spend_per_score_df = spend_per_score.groupby('Spending Range').mean()

spend_per_score_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Both
Spending Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0, 585]",83.455399,83.933814,93.460096,96.610877,62.320523
"(585, 630]",81.899826,83.155286,87.133538,92.718205,71.964589
"(630, 645]",78.518855,81.624473,73.484209,84.391793,90.535694
"(645, 680]",76.99721,81.027843,66.164813,81.133951,66.626727


In [296]:
#Getting Passing Rates per Budgetary Ranges
spending_math_scores = spend_per_score.groupby(["Spending Range"])["Average Math Score"].mean()
spending_reading_scores = spend_per_score.groupby(["Spending Range"])["Average Reading Score"].mean()
spending_passing_math = spend_per_score.groupby(["Spending Range"])["Percent Passing Math"].mean()
spending_passing_reading = spend_per_score.groupby(["Spending Range"])["Percent Passing Reading"].mean()
overall_passing_spending = spend_per_score.groupby(["Spending Range"])["Percent Passing Both"].mean()

Spending Range
(0, 585]      62.320523
(585, 630]    71.964589
(630, 645]    90.535694
(645, 680]    66.626727
Name: Percent Passing Both, dtype: float64

In [297]:
#Average Scores per School Size
spending_summary = spend_per_score[['Average Math Score',
                                   'Percent Passing Math',
                                   'Percent Passing Reading',
                                   'Percent Passing Both',
                                   'Spending Range']]
average_scores_per_capita = spending_summary.groupby('Spending Range').mean()
average_scores_per_capita

Unnamed: 0_level_0,Average Math Score,Percent Passing Math,Percent Passing Reading,Percent Passing Both
Spending Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(0, 585]",83.455399,93.460096,96.610877,62.320523
"(585, 630]",81.899826,87.133538,92.718205,71.964589
"(630, 645]",78.518855,73.484209,84.391793,90.535694
"(645, 680]",76.99721,66.164813,81.133951,66.626727


In [299]:
#Scores by School Size
size_per_score = schools_summary_df.copy()
size_bins = [0, 1000, 2000, 5000]
labels = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

size_per_score['Total Students'] = pd.to_numeric(size_per_score['Total Students'])

size_per_score['School Size'] = pd.cut(size_per_score['Total Students'], size_bins, labels)

size_per_score = size_per_score[['Average Math Score', 'Average Reading Score', 'Percent Passing Math',
                                 'Percent Passing Reading', 'Percent Passing Both', 'School Size']]

size_per_score_df = size_per_score.groupby('School Size').mean()
size_per_score_df


Unnamed: 0_level_0,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Both
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0, 1000]",83.821598,83.929843,93.550225,96.099437,72.039856
"(1000, 2000]",83.374684,83.864438,93.599695,96.79068,75.270536
"(2000, 5000]",77.746417,81.344493,69.963361,82.766634,72.341377


In [307]:
#Scores by School Type
school_type_summary = schools_summary_df.copy()
school_type_summary = school_type_summary[['School Type', 'Average Math Score', 'Average Reading Score',
                                           'Percent Passing Math', 'Percent Passing Reading', 'Percent Passing Both']]
school_type_summary_df = school_type_summary.groupby('School Type').mean()
school_type_summary_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Both
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,71.744987
District,76.956733,80.966636,66.548453,80.799062,75.029073
