This Notebook constains code that runs statistical test to see whether the ensemble types differ after controlling for correlations.

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from sklearn.preprocessing import StandardScaler
import re

from knobs_functions import *

In [2]:
my_score_list = ['Reock', 'Polsby-Popper', 'cut edges', 'Dem seats', 'efficiency gap', 'mean-median', 'partisan bias',
                  'competitive districts', 'average margin', 'county splits', 'MMD coalition']
print(my_score_list)

['Reock', 'Polsby-Popper', 'cut edges', 'Dem seats', 'efficiency gap', 'mean-median', 'partisan bias', 'competitive districts', 'average margin', 'county splits', 'MMD coalition']


In [4]:
# Let's find the correlation table for all state-chamber combinations

import warnings
warnings.filterwarnings('ignore')

D = dict() # dictionary mapping (state, chamber) to the correlation table using all ensembles    

for state, chamber in state_chamber_list:
    print(state, chamber)
    D[(state, chamber)] = correlation_table(state, chamber, my_score_list=my_score_list)

FL congress
FL upper


KeyboardInterrupt: 

In [None]:
D[('WI', 'congress')]

Unnamed: 0,Reock,Polsby-Popper,cut edges,Dem seats,efficiency gap,mean-median,partisan bias,competitive districts,average margin,county splits,MMD coalition
Reock,1.0,0.382458,-0.278829,0.047829,-0.044182,-0.028568,-0.049319,0.002223,0.119166,-0.240068,
Polsby-Popper,0.382458,1.0,-0.867037,-0.225652,0.233802,0.240871,0.284845,-0.02437,0.364352,-0.929502,
cut edges,-0.278829,-0.867037,1.0,0.259139,-0.265516,-0.268315,-0.330383,0.014642,-0.36705,0.77204,
Dem seats,0.047829,-0.225652,0.259139,1.0,-0.997784,-0.625424,-0.84147,-0.060965,-0.369558,0.221627,
efficiency gap,-0.044182,0.233802,-0.265516,-0.997784,1.0,0.642657,0.843912,0.041882,0.402076,-0.229172,
mean-median,-0.028568,0.240871,-0.268315,-0.625424,0.642657,1.0,0.779955,-0.298611,0.639015,-0.23576,
partisan bias,-0.049319,0.284845,-0.330383,-0.84147,0.843912,0.779955,1.0,0.056822,0.476848,-0.279533,
competitive districts,0.002223,-0.02437,0.014642,-0.060965,0.041882,-0.298611,0.056822,1.0,-0.478819,0.013018,
average margin,0.119166,0.364352,-0.36705,-0.369558,0.402076,0.639015,0.476848,-0.478819,1.0,-0.332281,
county splits,-0.240068,-0.929502,0.77204,0.221627,-0.229172,-0.23576,-0.279533,0.013018,-0.332281,1.0,


In [None]:
# Let's average the correlation tables over the state-chamber combinations
# (we need some extra code so the average for each cell is over the non-Nan values)
sum_corr = pd.DataFrame(0.0, columns=my_score_list, index=my_score_list)
count_corr = pd.DataFrame(0, columns=my_score_list, index=my_score_list)

# Accumulate sum and count of non-NaN entries
for state_chamber in state_chamber_list:
    df = D[state_chamber]
    mask = df.notna()
    sum_corr += df.fillna(0)
    count_corr += mask.astype(int)

# Compute average of non-NaN values
avg_corr = sum_corr / count_corr
avg_corr = avg_corr.round(2)

In [None]:
avg_corr

Unnamed: 0,Reock,Polsby-Popper,cut edges,Dem seats,efficiency gap,mean-median,partisan bias,competitive districts,average margin,county splits,MMD coalition
Reock,1.0,0.73,-0.72,0.01,-0.02,0.04,-0.01,-0.03,0.26,-0.63,0.06
Polsby-Popper,0.73,1.0,-0.91,0.01,-0.01,0.05,-0.01,-0.04,0.33,-0.95,0.06
cut edges,-0.72,-0.91,1.0,-0.01,0.01,-0.05,0.0,0.05,-0.34,0.81,-0.07
Dem seats,0.01,0.01,-0.01,1.0,-0.99,-0.24,-0.62,0.0,-0.09,-0.01,-0.05
efficiency gap,-0.02,-0.01,0.01,-0.99,1.0,0.24,0.6,-0.01,0.11,0.01,0.05
mean-median,0.04,0.05,-0.05,-0.24,0.24,1.0,0.37,-0.18,0.22,-0.05,0.07
partisan bias,-0.01,-0.01,0.0,-0.62,0.6,0.37,1.0,-0.02,0.14,0.02,0.07
competitive districts,-0.03,-0.04,0.05,0.0,-0.01,-0.18,-0.02,1.0,-0.52,0.04,0.0
average margin,0.26,0.33,-0.34,-0.09,0.11,0.22,0.14,-0.52,1.0,-0.32,0.14
county splits,-0.63,-0.95,0.81,-0.01,0.01,-0.05,0.02,0.04,-0.32,1.0,-0.05


In [None]:
# Let's mark with * the score pairs for which the sign of the correlation is consistent across all state-chamber combinations.
avg_corr_marked = avg_corr.copy().round(2)
for score1 in my_score_list:
    for score2 in my_score_list:
        num_pos = len([1 for state_chamber in state_chamber_list if D[state_chamber].loc[score1, score2] > 0])
        num_neg = len([1 for state_chamber in state_chamber_list if D[state_chamber].loc[score1, score2] < 0])
        consistent_sign = 1 if num_neg == 0 else -1 if num_pos == 0 else 0
        if consistent_sign !=0:
            avg_corr_marked.loc[score1, score2] = f'*{avg_corr_marked.loc[score1, score2]}'
avg_corr_marked.to_excel('tables/avg_corr.xlsx')
avg_corr_marked

Unnamed: 0,Reock,Polsby-Popper,cut edges,Dem seats,efficiency gap,mean-median,partisan bias,competitive districts,average margin,county splits,MMD coalition
Reock,*1.0,*0.73,*-0.72,0.01,-0.02,0.04,-0.01,-0.03,0.26,*-0.63,0.06
Polsby-Popper,*0.73,*1.0,*-0.91,0.01,-0.01,0.05,-0.01,-0.04,*0.33,*-0.95,0.06
cut edges,*-0.72,*-0.91,*1.0,-0.01,0.01,-0.05,0.0,0.05,*-0.34,*0.81,-0.07
Dem seats,0.01,0.01,-0.01,*1.0,*-0.99,-0.24,*-0.62,0.0,-0.09,-0.01,-0.05
efficiency gap,-0.02,-0.01,0.01,*-0.99,*1.0,0.24,*0.6,-0.01,0.11,0.01,0.05
mean-median,0.04,0.05,-0.05,-0.24,0.24,*1.0,0.37,-0.18,0.22,-0.05,0.07
partisan bias,-0.01,-0.01,0.0,*-0.62,*0.6,0.37,*1.0,-0.02,0.14,0.02,0.07
competitive districts,-0.03,-0.04,0.05,0.0,-0.01,-0.18,-0.02,*1.0,*-0.52,0.04,0.0
average margin,0.26,*0.33,*-0.34,-0.09,0.11,0.22,0.14,*-0.52,*1.0,*-0.32,0.14
county splits,*-0.63,*-0.95,*0.81,-0.01,0.01,-0.05,0.02,0.04,*-0.32,*1.0,-0.05


In [None]:
# Process and output table for LaTex
def stack_string(a):
    l = re.split(r"[ -]+", a)
    if len(l) == 1:
        return l[0]
    else:
        return f'\\makecell{{{l[0]} \\\\ {l[1]}}}'
    
col_dict = {score: stack_string(score) for score in my_score_list}

avg_corr_marked.rename(columns=col_dict, inplace=True)
avg_corr_marked.to_latex('latex tables/correlations.tex', escape=False)

  avg_corr_marked.to_latex('latex tables/correlations.tex', escape=False)


In [None]:
# Let's make a table showing the range of values over the state-chamber combinations
range_corr = pd.DataFrame(0, columns = my_score_list, index = my_score_list)
for score1 in my_score_list:
    for score2 in my_score_list:
        min_corr = min([D[state_chamber].loc[score1, score2] for state_chamber in state_chamber_list])
        max_corr = max([D[state_chamber].loc[score1, score2] for state_chamber in state_chamber_list])
        range_corr.loc[score1, score2] = f'({min_corr:.2f},{max_corr:.2f})'
range_corr.to_excel('tables/range_corr.xlsx')
range_corr

Unnamed: 0,Reock,Polsby-Popper,cut edges,Dem seats,efficiency gap,mean-median,partisan bias,competitive districts,average margin,county splits,MMD coalition
Reock,"(1.00,1.00)","(0.38,0.87)","(-0.88,-0.28)","(-0.13,0.22)","(-0.23,0.13)","(-0.12,0.24)","(-0.16,0.21)","(-0.35,0.16)","(-0.03,0.59)","(-0.80,-0.24)","(-0.22,0.23)"
Polsby-Popper,"(0.38,0.87)","(1.00,1.00)","(-0.95,-0.87)","(-0.23,0.31)","(-0.33,0.23)","(-0.16,0.31)","(-0.21,0.28)","(-0.42,0.19)","(0.05,0.69)","(-0.98,-0.92)","(-0.31,0.26)"
cut edges,"(-0.88,-0.28)","(-0.95,-0.87)","(1.00,1.00)","(-0.32,0.26)","(-0.27,0.33)","(-0.27,0.14)","(-0.33,0.22)","(-0.17,0.38)","(-0.62,-0.07)","(0.77,0.88)","(-0.25,0.29)"
Dem seats,"(-0.13,0.22)","(-0.23,0.31)","(-0.32,0.26)","(1.00,1.00)","(-1.00,-0.97)","(-0.69,0.22)","(-0.84,-0.32)","(-0.13,0.29)","(-0.39,0.22)","(-0.31,0.22)","(-0.18,0.06)"
efficiency gap,"(-0.23,0.13)","(-0.33,0.23)","(-0.27,0.33)","(-1.00,-0.97)","(1.00,1.00)","(-0.23,0.69)","(0.31,0.84)","(-0.30,0.14)","(-0.22,0.40)","(-0.23,0.33)","(-0.05,0.17)"
mean-median,"(-0.12,0.24)","(-0.16,0.31)","(-0.27,0.14)","(-0.69,0.22)","(-0.23,0.69)","(1.00,1.00)","(-0.43,0.84)","(-0.69,0.53)","(-0.29,0.64)","(-0.31,0.16)","(-0.01,0.19)"
partisan bias,"(-0.16,0.21)","(-0.21,0.28)","(-0.33,0.22)","(-0.84,-0.32)","(0.31,0.84)","(-0.43,0.84)","(1.00,1.00)","(-0.47,0.35)","(-0.30,0.58)","(-0.28,0.21)","(-0.31,0.26)"
competitive districts,"(-0.35,0.16)","(-0.42,0.19)","(-0.17,0.38)","(-0.13,0.29)","(-0.30,0.14)","(-0.69,0.53)","(-0.47,0.35)","(1.00,1.00)","(-0.67,-0.32)","(-0.19,0.42)","(-0.17,0.11)"
average margin,"(-0.03,0.59)","(0.05,0.69)","(-0.62,-0.07)","(-0.39,0.22)","(-0.22,0.40)","(-0.29,0.64)","(-0.30,0.58)","(-0.67,-0.32)","(1.00,1.00)","(-0.69,-0.05)","(-0.07,0.34)"
county splits,"(-0.80,-0.24)","(-0.98,-0.92)","(0.77,0.88)","(-0.31,0.22)","(-0.23,0.33)","(-0.31,0.16)","(-0.28,0.21)","(-0.19,0.42)","(-0.69,-0.05)","(1.00,1.00)","(-0.25,0.33)"


We'll run ANCOVA to determine whether the ensemble type effects the 'average margin' even after controlling for compactness and county-splitting.

The answer is YES, but we omitted this ANCOVA analysis from the table because "controlling for" is based on linear regression, and our relationships are non-linear.

In [None]:
# Now let's run ANCOVA

# first convert all measurements to standard units, so that the regression coefficients are comparable
df_std = df.copy()
scaler = StandardScaler()
df_std[["average margin", "Polsby-Popper", "county splits"]] = scaler.fit_transform(
    df[["average margin", "Polsby-Popper", "county splits"]]
)

# Now find the regression coefficients
model_std = ols('Q("average margin") ~ Q("Polsby-Popper") + Q("county splits") + C(ensemble)', data=df_std).fit()
print(model_std.summary())

                             OLS Regression Results                            
Dep. Variable:     Q("average margin")   R-squared:                       0.192
Model:                             OLS   Adj. R-squared:                  0.192
Method:                  Least Squares   F-statistic:                     4326.
Date:                 Sat, 03 May 2025   Prob (F-statistic):               0.00
Time:                         12:29:54   Log-Likelihood:            -2.6244e+05
No. Observations:               200000   AIC:                         5.249e+05
Df Residuals:                   199988   BIC:                         5.250e+05
Df Model:                           11                                         
Covariance Type:             nonrobust                                         
                                  coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercep

In [None]:
ancova_table = anova_lm(model_std, typ=2)
print(ancova_table)

                           sum_sq        df           F         PR(>F)
C(ensemble)           5894.710767       9.0  810.759575   0.000000e+00
Q("Polsby-Popper")     128.433261       1.0  158.982604   1.949074e-36
Q("county splits")     773.015896       1.0  956.886703  1.325266e-209
Residual            161559.255088  199988.0         NaN            NaN


In [None]:
# Create dataframe showing ANOVA results for all state-chamber combinations

Regression_dict = dict() # dictionary mapping (state, chamber) to the regression coefficients using all ensembles
ANCOVA_dict = dict() # dictionary mapping (state, chamber) to the ANCOVA table using all ensembles

for state, chamber in state_chamber_list:
    print(state, chamber)
    corr, df = correlation_table(state, chamber, my_ensemble_list=my_ensemble_list, my_score_list = my_score_list, step_size=1, return_dataframe=True)
    # first convert all measurements to standard units, so that the regression coefficients are comparable
    df_std = df.copy()
    scaler = StandardScaler()
    df_std[["average margin", "Polsby-Popper", "county splits"]] = scaler.fit_transform(
        df[["average margin", "Polsby-Popper", "county splits"]]
    )
    model_std = ols('Q("average margin") ~ Q("Polsby-Popper") + Q("county splits") + C(ensemble)', data=df_std).fit()
    Regression_dict[f'{state}_{chamber}'] = model_std.params
    ANCOVA_dict[f'{state}_{chamber}'] = anova_lm(model_std, typ=2)['sum_sq']

In [None]:
ANCOVA_df = pd.DataFrame(ANCOVA_dict).T
ANCOVA_df.loc["average"] = ANCOVA_df.mean(numeric_only=True) # add average row
ANCOVA_df

Unnamed: 0,C(ensemble),"Q(""Polsby-Popper"")","Q(""county splits"")",Residual
FL_congress,5894.710767,128.433261,773.015896,161559.255088
FL_upper,1727.000876,19.677985,130.029015,190422.404614
FL_lower,2283.173181,28.56555,334.860524,173526.505366
IL_congress,1118.572662,325.053525,2189.610053,190947.108176
IL_upper,1237.856254,355.658702,87.527462,178280.81191
IL_lower,5439.324841,230.827466,1066.827742,190121.080742
MI_congress,137.758513,4.530085,109.218003,199368.101494
MI_upper,1832.810112,821.842646,9.137649,173819.756931
MI_lower,3505.144981,29.620847,338.95064,189974.683463
NC_congress,334.027642,185.123294,61.75213,194707.382108


In [None]:
Regression_df = pd.DataFrame(Regression_dict).T
Regression_df.loc["average"] = Regression_df.mean(numeric_only=True) # add average row
Regression_df.round(2)

Unnamed: 0,Intercept,C(ensemble)[T.county100],C(ensemble)[T.county25],C(ensemble)[T.county50],C(ensemble)[T.county75],C(ensemble)[T.distpair],C(ensemble)[T.distpair_ust],C(ensemble)[T.pop_minus],C(ensemble)[T.pop_plus],C(ensemble)[T.ust],"Q(""Polsby-Popper"")","Q(""county splits"")"
FL_congress,0.24,-0.46,-0.11,-0.28,-0.43,-0.33,-0.61,0.03,0.0,-0.21,0.09,-0.37
FL_upper,0.06,-0.06,0.04,0.04,-0.05,-0.17,-0.36,-0.02,0.06,-0.12,-0.04,-0.19
FL_lower,0.24,-0.42,-0.2,-0.35,-0.4,-0.3,-0.49,-0.04,0.03,-0.19,0.05,-0.36
IL_congress,0.22,-0.74,-0.27,-0.5,-0.72,-0.06,-0.03,0.0,-0.01,0.08,-0.14,-0.62
IL_upper,0.14,-0.34,-0.02,-0.18,-0.3,-0.21,-0.3,-0.01,0.01,-0.07,0.19,-0.19
IL_lower,0.6,-1.72,-0.39,-1.06,-1.58,-0.42,-0.64,-0.0,0.01,-0.2,-0.16,-0.78
MI_congress,0.07,-0.19,-0.05,-0.15,-0.2,-0.06,-0.04,0.0,-0.0,-0.01,-0.02,-0.14
MI_upper,0.08,-0.16,0.03,-0.02,-0.12,-0.23,-0.3,0.03,-0.01,-0.01,0.28,-0.05
MI_lower,0.36,-0.82,-0.3,-0.57,-0.77,-0.34,-0.62,0.01,-0.01,-0.2,-0.06,-0.41
NC_congress,-0.06,0.3,0.07,0.22,0.29,-0.03,-0.15,0.01,0.0,-0.09,-0.11,-0.12
