In [26]:
import pandas as pd

In [27]:
# Load the dataset
school_data = pd.read_csv('../PyCitySchools/Resources/schools_complete.csv')
students_data = pd.read_csv('../PyCitySchools/Resources/students_complete.csv')

In [28]:
# Merge the two datasets on school name to combine all necessary information
combined_data = pd.merge(students_data, school_data, how="left", on=["school_name"])

In [29]:
# Calculate spending per student
combined_data['Spending Per Student'] = combined_data['budget'] / combined_data['size']

In [30]:
# Create bins and labels for spending ranges
spending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]

In [31]:
# Categorize spending based on the bins
combined_data['Spending Range (Per Student)'] = pd.cut(combined_data['Spending Per Student'], bins=spending_bins, labels=labels)

In [32]:
# Group by the spending range and calculate mean scores
school_spending_df = combined_data.groupby('Spending Range (Per Student)', observed=True).agg({
    'math_score': 'mean',
    'reading_score': 'mean',
    'Student ID': 'count'
}).rename(columns={
    'math_score': 'Average Math Score',
    'reading_score': 'Average Reading Score',
    'Student ID': 'Total Students'
})

In [33]:
# Calculate % passing math and reading
school_spending_df['% Passing Math'] = (combined_data[combined_data['math_score'] >= 70].groupby('Spending Range (Per Student)', observed=True)['Student ID'].count() / school_spending_df['Total Students']) * 100
school_spending_df['% Passing Reading'] = (combined_data[combined_data['reading_score'] >= 70].groupby('Spending Range (Per Student)', observed=True)['Student ID'].count() / school_spending_df['Total Students']) * 100

In [34]:
# Calculate % overall passing
school_spending_df['% Overall Passing'] = (combined_data[(combined_data['math_score'] >= 70) & (combined_data['reading_score'] >= 70)].groupby('Spending Range (Per Student)', observed=True)['Student ID'].count() / school_spending_df['Total Students']) * 100

In [38]:
# Calculate mean scores per spending range
spending_math_scores = school_spending_df.groupby("Spending Range (Per Student)", observed=True)["Average Math Score"].mean()
spending_reading_scores = school_spending_df.groupby("Spending Range (Per Student)", observed=True)["Average Reading Score"].mean()
spending_passing_math = school_spending_df.groupby("Spending Range (Per Student)", observed=True)["% Passing Math"].mean()
spending_passing_reading = school_spending_df.groupby("Spending Range (Per Student)", observed=True)["% Passing Reading"].mean()
overall_passing_spending = school_spending_df.groupby("Spending Range (Per Student)", observed=True)["% Overall Passing"].mean()

In [41]:
# Display the summary DataFrame
print(spending_summary_df)

                              Average Math Score  Average Reading Score  \
Spending Range (Per Student)                                              
<$585                                  83.363065              83.964039   
$585-630                               79.982873              82.312643   
$630-645                               77.821056              81.301007   
$645-680                               77.049297              81.005604   

                              % Passing Math  % Passing Reading  \
Spending Range (Per Student)                                      
<$585                              93.702889          96.686558   
$585-630                           79.109851          88.513145   
$630-645                           70.623565          82.600247   
$645-680                           66.230813          81.109397   

                              % Overall Passing  
Spending Range (Per Student)                     
<$585                                 90.64070