# Graduation Rates vs SAT (Public vs Private)

This notebook reproduces the core analysis:
- Scatter plots of graduation rate vs SAT
- Correlations by sector
- Separate least-squares regressions (public vs private)
- Prediction for Truman at SAT ≈ 1190
- (Bonus) Single model with an interaction term (ANCOVA-style)

Dataset: `data/GradRates.xlsx` (cleaned version: `data/grad_rates_clean.csv`)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.formula.api as smf

df = pd.read_csv("../data/grad_rates_clean.csv")
df.head(), df.shape


In [None]:
df['sector'].value_counts()

In [None]:
# Scatter plot by sector (default colors)
plt.figure()
for sector, marker in [('Public','o'),('Private','x')]:
    sub = df[df['sector']==sector]
    plt.scatter(sub['sat_total'], sub['graduation_rate'], marker=marker, label=sector)
plt.xlabel("SAT (Math + Verbal)")
plt.ylabel("Graduation rate (%)")
plt.title("Graduation Rate vs SAT")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Correlations
def corr(sub):
    return np.corrcoef(sub['sat_total'], sub['graduation_rate'])[0,1]

corr_public = corr(df[df['sector']=='Public'])
corr_private = corr(df[df['sector']=='Private'])
corr_public, corr_private


In [None]:
# Separate regressions (SciPy)
def linreg(sub):
    x = sub['sat_total'].astype(float)
    y = sub['graduation_rate'].astype(float)
    slope, intercept, r, p, se = stats.linregress(x, y)
    return slope, intercept, r, p, se

pub = df[df['sector']=='Public']
pri = df[df['sector']=='Private']

pub_slope, pub_int, pub_r, pub_p, pub_se = linreg(pub)
pri_slope, pri_int, pri_r, pri_p, pri_se = linreg(pri)

(pub_int, pub_slope, pub_r, pub_p), (pri_int, pri_slope, pri_r, pri_p)


In [None]:
# Truman prediction (SAT ≈ 1190)
sat_truman = 1190
pred_truman = pub_int + pub_slope * sat_truman
pred_truman


In [None]:
# Bonus: single model with interaction (ANCOVA-style)
m = smf.ols("graduation_rate ~ sat_total * sector", data=df).fit()
m.summary()
