In [17]:
# Dependencies and Setup
from pathlib import Path
import pandas as pd

# File to Load

school_data_to_load = Path('./PyCitySchools/Resources/schools_complete.csv')

student_data_to_load = Path("./PyCitySchools/Resources/students_complete.csv")

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset:
    # The data is combined on 'school_name', school_data.shape = (15 Rows, 5 Columns) student_data.shape = (39170 Rows, 7 Columns) : 
    # meaning there will be repeat values for all values within the school_data added to school_data_complete. i.e. (school_name, School ID, type, size, budget).

school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [18]:
# Calculate the Totals (Schools and Students)

# Total number of unique schools:
    # Use nunique() as there is many repeat values for school_name in the school_data_complete.
school_count = school_data_complete['school_name'].nunique()

# Total number of students, unlikely but some students may have the same name:
student_count = school_data_complete['student_name'].count()

# Total budget Should Equal = $24,649,428.00

# Calculate the Total Budget:
# The budget column value, refers to the budget for each school. Assuming We know the budget for two schools are not the same.
# Therefore the total budget: create Pandas data Series for the budget column, grab each unique value, sum those values.
total_budget = school_data_complete['budget'].unique().sum()

# Average maths score:  create Pandas data for the 'maths_score' Column, find the mean of all values in that series.
average_maths_score = school_data_complete['maths_score'].mean()

# Average reading score: create Pandas data for the 'reading_score' Column, find the mean of all values in that series.
average_reading_score = school_data_complete['reading_score'].mean()

In [19]:
# Calculate the Percentage Pass Rates:
    # 1. Count how many students passed maths and reading and both, 
        # Return a slice of the dataframe with rows that satisfy the condition(s) then, 
        # count the number non-null values in the "student_name" column.

    # 2. Calculate percentages of the total students.

# % passing maths (the percentage of students who passed maths): 
passing_maths_count = school_data_complete[(school_data_complete["maths_score"] >= 50)].count()["student_name"]

# % passing reading (the percentage of students who passed reading):
passing_reading_count = school_data_complete[(school_data_complete['reading_score'] >= 50)].count()["student_name"]

# % overall passing (the percentage of students who passed maths AND reading):
passing_maths_reading_count = school_data_complete[(school_data_complete['reading_score'] >= 50)&
                                                    (school_data_complete["maths_score"] >= 50)].count()["student_name"]

# Simple percentage calculations:
passing_maths_percentage = passing_maths_count / float(student_count) * 100
passing_reading_percentage = passing_reading_count / float(student_count) * 100
overall_passing_rate = passing_maths_reading_count / float(student_count) * 100

In [20]:
# Convert to DataFrame:
 # Create a dictionary with all the data needed with the area_summary dataframe,

Area_Data = {
            'Total Schools':school_count ,
            'Total Students':student_count,
            'Total Budget': total_budget,
            'Average Maths Score': average_maths_score,
            'Average Reading Score': average_reading_score,
            '% Passing Maths': passing_maths_percentage,
            '% Passing Reading': passing_reading_percentage,
            '% Overall Passing': overall_passing_rate
            }
    
# Convert dictionary into dataframe by placing data.
area_summary = pd.DataFrame(Area_Data, index = [0])

# Formatting: Change Total students and Total budget to more readable format.
area_summary["Total Students"] = area_summary["Total Students"].map("{:,}".format)
area_summary["Total Budget"] = area_summary["Total Budget"].map("${:,.2f}".format)

# Display the DataFrame
area_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",70.338192,69.980138,86.078632,84.426857,72.808272


In [21]:
# Use the code provided to select the type per school from school_data
    # Sets the index of the DataFrame then selects the column from the DataFrame.
school_types = school_data.set_index(["school_name"])["type"]

# Calculate the total student count per school from school_data:
    # Sets index Column: "school_name", selects "size" columns.
per_school_counts = school_data.set_index(["school_name"])["size"]

# Calculate the total school budget and per capita spending per school from school_data:
    # Sets index Column: "school_name", selects "budget" column.
per_school_budget = school_data.set_index(["school_name"])["budget"]

# Calculate how much money each school spends per student (per capita).
    # Divide per school budget by how many students at that school.
per_school_capita = per_school_budget/per_school_counts

# Calculate the average test scores per school from school_data_complete.
    # groups the DataFrame by "school_name" column, select 'score' column, calculates mean of selected column.
per_school_maths =  school_data_complete.groupby(["school_name"])["maths_score"].mean()
per_school_reading = school_data_complete.groupby(["school_name"])["reading_score"].mean()


In [22]:
# Get the students who passed maths and passed reading by creating separate filtered DataFrames from school_data_complete.
school_passing_maths = school_data_complete.loc[school_data_complete["maths_score"]>=50,:]
school_passing_reading = school_data_complete.loc[school_data_complete["reading_score"]>= 50,:]

# Get the students who passed both reading and maths in a separate DataFrame from school_data_complete.
passing_maths_and_reading = school_data_complete.loc[(school_data_complete["maths_score"] >= 50) &
                                                     (school_data_complete["reading_score"] >= 50), :]

In [23]:
#  Calculate the Percentage Pass Rates
# 1.Count how many students passed maths, reading and both for each school.   
    # Groups the DataFrame by "school_name" column,
    # select column, counts non-null values of student name columns in each group.
# 2. Calculates the percentage from the school size and number of students passing.
# Note: This could be done in one line of code, but for ease of readability, 
#       the steps have been broken into two lines.

# Percentage Passing Maths:
per_school_passing_maths_counts = school_passing_maths.groupby(["school_name"])['student_name'].count()

# Percentage Passing Reading:
per_school_passing_reading_counts = school_passing_reading.groupby(["school_name"])['student_name'].count()

# Percentage Passing overall:
per_school_passing_overall_counts = passing_maths_and_reading.groupby(["school_name"])['student_name'].count()

# Simple percentage calculations:
per_school_passing_maths = 100*per_school_passing_maths_counts/per_school_counts
per_school_passing_reading = 100*per_school_passing_reading_counts/per_school_counts
overall_passing_rate = 100* per_school_passing_overall_counts / per_school_counts


In [24]:
# Convert to DataFrame.
per_school_summary = pd.concat([school_types, 
                per_school_counts, 
                per_school_budget, 
                per_school_capita, 
                per_school_maths, 
                per_school_reading, 
                per_school_passing_maths, 
                per_school_passing_reading, 
                overall_passing_rate], axis=1)

In [25]:
# Formatting
# Rename columns to match the required output.
per_school_summary = per_school_summary.rename(columns = {'type':'School Type',
                                     'size' : 'Total Students',
                                     'budget' : 'Total School Budget',
                                     0:'Per Student Budget',
                                     'maths_score' : 'Average Maths Score',
                                     'reading_score' : 'Average Reading Score',
                                     1:'% Passing Maths',
                                     2 :'% Passing Reading',
                                     3 : '% Overall Passing'})

# Sort rows to match the order of the required output (alphabetical).
per_school_summary = per_school_summary.sort_index()

per_school_summary["Total School Budget"] = per_school_summary["Total School Budget"].map("${:,.2f}".format)
per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)

# Display the DataFrame
per_school_summary

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.352894,71.008842,91.639871,87.379421,80.084405
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.657158,71.359526,90.850377,89.074273,80.785791
Figueroa High School,Government,2949,"$1,884,411.00",$639.00,68.698542,69.077993,81.654798,82.807731,67.650051
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.091274,69.572472,82.438846,82.219788,67.46988
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.788147,71.245232,91.212534,88.487738,81.33515
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.874865,69.186408,80.949299,81.877023,66.364617
Holden High School,Independent,427,"$248,087.00",$581.00,72.583138,71.660422,89.929742,88.52459,78.922717
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.935207,68.910525,81.693521,81.453548,66.712376
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.8431,69.039277,82.062592,81.978576,67.191766
Pena High School,Independent,962,"$585,858.00",$609.00,72.088358,71.613306,91.683992,86.590437,79.209979


In [26]:
# Sort and show top five schools
    # Sort Values based on overall passing students in descending order.
top_schools = per_school_summary.sort_values('% Overall Passing', ascending = False)
    # Show the first five values.
top_schools.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.788147,71.245232,91.212534,88.487738,81.33515
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.657158,71.359526,90.850377,89.074273,80.785791
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.352894,71.008842,91.639871,87.379421,80.084405
Wright High School,Independent,1800,"$1,049,400.00",$583.00,72.047222,70.969444,91.777778,86.666667,79.722222
Rodriguez High School,Government,3999,"$2,547,363.00",$637.00,72.047762,70.935984,90.797699,87.396849,79.419855


In [27]:
# Sort and show bottom five schools
 # Sort Values based on overall passing students in ascending order.
bottom_schools = per_school_summary.sort_values('% Overall Passing', ascending = True)
 # Show the first five values.
bottom_schools.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.874865,69.186408,80.949299,81.877023,66.364617
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.935207,68.910525,81.693521,81.453548,66.712376
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.8431,69.039277,82.062592,81.978576,67.191766
Wilson High School,Independent,2283,"$1,319,574.00",$578.00,69.170828,68.876916,82.785808,81.29654,67.455103
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.091274,69.572472,82.438846,82.219788,67.46988


In [28]:
# Create data series of scores by year levels using conditionals
year_nine = school_data_complete[(school_data_complete["year"] == 9)]
year_ten = school_data_complete[(school_data_complete["year"] == 10)]
year_eleven = school_data_complete[(school_data_complete["year"] == 11)]
year_twelve = school_data_complete[(school_data_complete["year"] == 12)]


# Group each by school name
year_nine_scores = year_nine.groupby('school_name')
year_ten_scores = year_ten.groupby('school_name')
year_eleven_scores = year_eleven.groupby('school_name')
year_twelve_scores = year_twelve.groupby('school_name')

# Combine series into single DataFrame
maths_scores_by_year = pd.concat([year_nine_scores['maths_score'].mean(),
                                  year_ten_scores['maths_score'].mean(),
                                  year_eleven_scores['maths_score'].mean(),
                                  year_twelve_scores['maths_score'].mean()], axis=1)
# Rename columns
maths_scores_by_year.columns = ['Year 9', 'Year 10', 'Year 11', 'Year 12']

# # Minor data wrangling
maths_scores_by_year.index.name = None

# Display the DataFrame
maths_scores_by_year

Unnamed: 0,Year 9,Year 10,Year 11,Year 12
Bailey High School,72.493827,71.897498,72.3749,72.675097
Cabrera High School,72.32197,72.437768,71.008299,70.604712
Figueroa High School,68.477804,68.331586,68.811001,69.325282
Ford High School,69.021609,69.387006,69.248862,68.617811
Griffin High School,72.789731,71.093596,71.692521,71.469178
Hernandez High School,68.586831,68.867156,69.154412,68.985075
Holden High School,70.543307,75.105263,71.640777,73.409639
Huang High School,69.081754,68.533246,69.431345,68.639316
Johnson High School,69.469286,67.99022,68.63773,69.287393
Pena High School,71.996364,72.396,72.523438,71.187845


In [29]:
# NOTE: Code has been removed from starter code as it was repeat code and was 
#       unnessescary. 

# Combine series into single DataFrame
reading_scores_by_year = pd.concat([year_nine_scores['reading_score'].mean(),
                                  year_ten_scores['reading_score'].mean(),
                                  year_eleven_scores['reading_score'].mean(),
                                  year_twelve_scores['reading_score'].mean()], axis=1)

# Rename columns
reading_scores_by_year.columns = ['Year 9', 'Year 10', 'Year 11', 'Year 12']

# Minor data wrangling
reading_scores_by_year.index.name = None

# Display the DataFrame
reading_scores_by_year


Unnamed: 0,Year 9,Year 10,Year 11,Year 12
Bailey High School,70.90192,70.848265,70.317346,72.195525
Cabrera High School,71.172348,71.328326,71.201245,71.856021
Figueroa High School,70.261682,67.677588,69.152327,69.082126
Ford High School,69.615846,68.988701,70.735964,68.849722
Griffin High School,72.026895,70.746305,72.385042,69.434932
Hernandez High School,68.477569,70.621842,68.418199,69.244136
Holden High School,71.598425,71.096491,73.31068,70.481928
Huang High School,68.670616,69.516297,68.740638,68.671795
Johnson High School,68.719286,69.295029,69.969115,67.992521
Pena High School,70.949091,72.324,71.703125,71.513812


In [30]:
# Establish the bins and the group names the data will be sorted into.
spending_bins = [0, 585, 630, 645, 680]
group_names = ["<$585", "$585-630", "$630-645", "$645-680"]

In [31]:
# Create a copy of the school summary since it has the "Per Student Budget"
#  This step can be skipped but its best to make a copy.
school_spending_df = per_school_summary

In [32]:
# Categorise spending based on the bins.

# Reformat 'Per Student Budget' Column into a comparable datatype.
school_spending_df['Per Student Budget'] = school_spending_df['Per Student Budget'].str.replace('$', '').astype(float)

# Place 'Per Student Budget' Column into School_spending_df dataframe.
school_spending_df["Spending Ranges (Per Student)"] = pd.to_numeric(school_spending_df['Per Student Budget'])

# Cut dataframe up based on spending_bins and label the groups.
school_spending_df["Spending Ranges (Per Student)"] = pd.cut(school_spending_df['Per Student Budget'], spending_bins, labels = group_names)

# Reformat back to the required format for presentation.
school_spending_df["Per Student Budget"] = school_spending_df["Per Student Budget"].map("${:,.2f}".format)

school_spending_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing,Spending Ranges (Per Student)
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.352894,71.008842,91.639871,87.379421,80.084405,$585-630
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.657158,71.359526,90.850377,89.074273,80.785791,<$585
Figueroa High School,Government,2949,"$1,884,411.00",$639.00,68.698542,69.077993,81.654798,82.807731,67.650051,$630-645
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.091274,69.572472,82.438846,82.219788,67.46988,$630-645
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.788147,71.245232,91.212534,88.487738,81.33515,$585-630
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.874865,69.186408,80.949299,81.877023,66.364617,$645-680
Holden High School,Independent,427,"$248,087.00",$581.00,72.583138,71.660422,89.929742,88.52459,78.922717,<$585
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.935207,68.910525,81.693521,81.453548,66.712376,$645-680
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.8431,69.039277,82.062592,81.978576,67.191766,$645-680
Pena High School,Independent,962,"$585,858.00",$609.00,72.088358,71.613306,91.683992,86.590437,79.209979,$585-630


In [33]:
#  Calculate averages for the desired columns.
    # groups the DataFrame school_spending_df by the values in the "Spending Ranges (Per Student)" column. 
    # selects the required score column then calculates the mean (average) value within each group.
spending_maths_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"], observed=False)["Average Maths Score"].mean()
spending_reading_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"], observed=False)["Average Reading Score"].mean()
spending_passing_maths = school_spending_df.groupby(["Spending Ranges (Per Student)"], observed=False)["% Passing Maths"].mean()
spending_passing_reading = school_spending_df.groupby(["Spending Ranges (Per Student)"], observed=False)["% Passing Reading"].mean()
overall_passing_spending = school_spending_df.groupby(["Spending Ranges (Per Student)"], observed=False)["% Overall Passing"].mean()


In [34]:
# Assemble into DataFrame
 # Give column names to match results.
spending_summary = pd.DataFrame({
    "Average Maths Score": spending_maths_scores,
    "Average Reading Score": spending_reading_scores,
    "% Passing Maths": spending_passing_maths,
    "% Passing Reading": spending_passing_reading,
    "% Overall Passing": overall_passing_spending})

#Format to Match Results
spending_summary["Average Maths Score"] = spending_summary["Average Maths Score"].map("{:,.2f}".format)
spending_summary["Average Reading Score"] = spending_summary["Average Reading Score"].map("{:,.2f}".format)
spending_summary["% Passing Maths"] = spending_summary["% Passing Maths"].map("{:,.2f}".format)
spending_summary["% Passing Reading"] = spending_summary["% Passing Reading"].map("{:,.2f}".format)
spending_summary["% Overall Passing"] = spending_summary["% Overall Passing"].map("{:,.2f}".format)

spending_summary

Unnamed: 0_level_0,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,71.36,70.72,88.84,86.39,76.72
$585-630,72.07,71.03,91.52,87.29,79.88
$630-645,69.85,69.84,84.69,83.76,71.0
$645-680,68.88,69.05,81.57,81.77,66.76


In [35]:
# Establish the bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

In [36]:
# Categorize the spending based on the bins
    # Does not to be reformatted as above.
# Cut dataframe up based on size_bins and label the groups.
per_school_summary["School Size"] = pd.cut(per_school_summary['Total Students'], size_bins, labels = group_names)
per_school_summary

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing,Spending Ranges (Per Student),School Size
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.352894,71.008842,91.639871,87.379421,80.084405,$585-630,Large (2000-5000)
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.657158,71.359526,90.850377,89.074273,80.785791,<$585,Medium (1000-2000)
Figueroa High School,Government,2949,"$1,884,411.00",$639.00,68.698542,69.077993,81.654798,82.807731,67.650051,$630-645,Large (2000-5000)
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.091274,69.572472,82.438846,82.219788,67.46988,$630-645,Large (2000-5000)
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.788147,71.245232,91.212534,88.487738,81.33515,$585-630,Medium (1000-2000)
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.874865,69.186408,80.949299,81.877023,66.364617,$645-680,Large (2000-5000)
Holden High School,Independent,427,"$248,087.00",$581.00,72.583138,71.660422,89.929742,88.52459,78.922717,<$585,Small (<1000)
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.935207,68.910525,81.693521,81.453548,66.712376,$645-680,Large (2000-5000)
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.8431,69.039277,82.062592,81.978576,67.191766,$645-680,Large (2000-5000)
Pena High School,Independent,962,"$585,858.00",$609.00,72.088358,71.613306,91.683992,86.590437,79.209979,$585-630,Small (<1000)


In [207]:
# Calculate averages for the desired columns.
size_maths_scores = per_school_summary.groupby(["School Size"], observed=False)["Average Maths Score"].mean()
size_reading_scores = per_school_summary.groupby(["School Size"], observed=False)["Average Reading Score"].mean()
size_passing_maths = per_school_summary.groupby(["School Size"], observed=False)["% Passing Maths"].mean()
size_passing_reading = per_school_summary.groupby(["School Size"], observed=False)["% Passing Reading"].mean()
size_overall_passing = per_school_summary.groupby(["School Size"], observed=False)["% Overall Passing"].mean()


In [208]:
# Assemble into DataFrame
    #Give column names to match results.
size_summary = pd.DataFrame({
    "Average Maths Score": size_maths_scores,
    "Average Reading Score": size_reading_scores,
    "% Passing Maths": size_passing_maths,
    "% Passing Reading": size_passing_reading,
    "% Overall Passing": size_overall_passing
})

# Display results
size_summary

Unnamed: 0_level_0,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),72.335748,71.636864,90.806867,87.557513,79.066348
Medium (1000-2000),71.42165,70.720164,89.84656,86.714149,78.039785
Large (2000-5000),69.751809,69.576052,84.252804,83.301185,70.293507


In [209]:
# Create new series using groupby for:
# Type | Average Maths Score | Average Reading Score | % Passing Maths | % Passing Reading | % Overall Passing

type_maths_scores = per_school_summary.groupby(["School Type"], observed=False)["Average Maths Score"].mean()
type_reading_scores = per_school_summary.groupby(["School Type"], observed=False)["Average Reading Score"].mean()
type_passing_maths = per_school_summary.groupby(["School Type"], observed=False)["% Passing Maths"].mean()
type_passing_reading = per_school_summary.groupby(["School Type"], observed=False)["% Passing Reading"].mean()
type_overall_passing = per_school_summary.groupby(["School Type"], observed=False)["% Overall Passing"].mean()


In [210]:
# Assemble into DataFrame
type_summary = pd.DataFrame({
    "Average Maths Score": type_maths_scores,
    "Average Reading Score": type_reading_scores,
    "% Passing Maths": type_passing_maths,
    "% Passing Reading": type_passing_reading,
    "% Overall Passing": type_overall_passing
})
# Display results
type_summary

Unnamed: 0_level_0,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Government,69.834806,69.675929,84.462375,83.587562,70.698993
Independent,71.368822,70.718933,89.204043,86.247789,76.97334
