In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from sklearn.preprocessing import StandardScaler
import openpyxl

from knobs_functions import *

In [3]:
# Let's find the correlation table for all state-chamber combinations

my_score_list = ['Polsby-Popper', 'cut edges', 'maj Dem seats', 'maj mean-median', 'maj partisan bias', 'average margin', 'county splits']
D = dict() # dictionary mapping (state, chamber) to the correlation table using all ensembles    
D0 = dict() # dictionary mapping (state, chamber) to the correlation table for only the base0 ensemble

for state, chamber in state_chamber_list:
    print(state, chamber)
    D[(state, chamber)] = correlation_table(state, chamber, my_score_list=my_score_list)
    D0[(state, chamber)] = correlation_table(state, chamber, my_score_list=my_score_list, my_ensemble_list=['base0'])

FL congress
FL upper
FL lower
IL congress
IL upper
IL lower
MI congress
MI upper
MI lower
NC congress
NC upper
NC lower
NY congress
NY upper
NY lower
OH congress
OH upper
OH lower
WI congress
WI upper
WI lower


In [4]:
# Let's average the correlation tables over the state-chamber combinations

avg_corr = pd.DataFrame(0, columns = my_score_list, index = my_score_list)
avg_corr0 = pd.DataFrame(0, columns = my_score_list, index = my_score_list) # just using the base0 ensemble
for state_chamber in state_chamber_list:
    avg_corr += D[state_chamber]
    avg_corr0 += D0[state_chamber]
avg_corr /= len(state_chamber_list)
avg_corr0 /= len(state_chamber_list)
avg_corr.round(2)

Unnamed: 0,Polsby-Popper,cut edges,maj Dem seats,maj mean-median,maj partisan bias,average margin,county splits
Polsby-Popper,1.0,-0.91,-0.05,0.08,0.03,0.33,-0.95
cut edges,-0.91,1.0,0.06,-0.07,-0.04,-0.34,0.81
maj Dem seats,-0.05,0.06,1.0,-0.24,-0.62,-0.08,0.05
maj mean-median,0.08,-0.07,-0.24,1.0,0.37,-0.09,-0.08
maj partisan bias,0.03,-0.04,-0.62,0.37,1.0,-0.09,-0.03
average margin,0.33,-0.34,-0.08,-0.09,-0.09,1.0,-0.32
county splits,-0.95,0.81,0.05,-0.08,-0.03,-0.32,1.0


In [5]:
# Let's mark with * the score pairs for which the sign of the correlation is consistent across all state-chamber combinations.
avg_corr_marked = avg_corr.copy().round(2)
for score1 in my_score_list:
    for score2 in my_score_list:
        num_pos = len([1 for state_chamber in state_chamber_list if D[state_chamber].loc[score1, score2] > 0])
        consistent_sign = 1 if num_pos == len(state_chamber_list) else -1 if num_pos == 0 else 0
        if consistent_sign !=0:
            avg_corr_marked.loc[score1, score2] = f'*{avg_corr_marked.loc[score1, score2]}'
avg_corr_marked.to_excel('tables/avg_corr.xlsx')
avg_corr_marked

Unnamed: 0,Polsby-Popper,cut edges,maj Dem seats,maj mean-median,maj partisan bias,average margin,county splits
Polsby-Popper,*1.0,*-0.91,-0.05,0.08,0.03,*0.33,*-0.95
cut edges,*-0.91,*1.0,0.06,-0.07,-0.04,*-0.34,*0.81
maj Dem seats,-0.05,0.06,*1.0,-0.24,*-0.62,-0.08,0.05
maj mean-median,0.08,-0.07,-0.24,*1.0,0.37,-0.09,-0.08
maj partisan bias,0.03,-0.04,*-0.62,0.37,*1.0,-0.09,-0.03
average margin,*0.33,*-0.34,-0.08,-0.09,-0.09,*1.0,*-0.32
county splits,*-0.95,*0.81,0.05,-0.08,-0.03,*-0.32,*1.0


In [6]:
# Let's make a table showing the range of values over the state-chamber combinations
range_corr = pd.DataFrame(0, columns = my_score_list, index = my_score_list)
for score1 in my_score_list:
    for score2 in my_score_list:
        min_corr = min([D[state_chamber].loc[score1, score2] for state_chamber in state_chamber_list])
        max_corr = max([D[state_chamber].loc[score1, score2] for state_chamber in state_chamber_list])
        range_corr.loc[score1, score2] = f'({min_corr:.2f},{max_corr:.2f})'
range_corr.to_excel('tables/range_corr.xlsx')
range_corr

Unnamed: 0,Polsby-Popper,cut edges,maj Dem seats,maj mean-median,maj partisan bias,average margin,county splits
Polsby-Popper,"(1.00,1.00)","(-0.95,-0.87)","(-0.33,0.18)","(-0.16,0.31)","(-0.20,0.29)","(0.04,0.69)","(-0.98,-0.92)"
cut edges,"(-0.95,-0.87)","(1.00,1.00)","(-0.17,0.36)","(-0.27,0.14)","(-0.33,0.19)","(-0.63,-0.06)","(0.77,0.88)"
maj Dem seats,"(-0.33,0.18)","(-0.17,0.36)","(1.00,1.00)","(-0.71,0.22)","(-0.84,-0.33)","(-0.41,0.14)","(-0.17,0.34)"
maj mean-median,"(-0.16,0.31)","(-0.27,0.14)","(-0.71,0.22)","(1.00,1.00)","(-0.41,0.85)","(-0.59,0.64)","(-0.31,0.17)"
maj partisan bias,"(-0.20,0.29)","(-0.33,0.19)","(-0.84,-0.33)","(-0.41,0.85)","(1.00,1.00)","(-0.59,0.48)","(-0.30,0.20)"
average margin,"(0.04,0.69)","(-0.63,-0.06)","(-0.41,0.14)","(-0.59,0.64)","(-0.59,0.48)","(1.00,1.00)","(-0.69,-0.04)"
county splits,"(-0.98,-0.92)","(0.77,0.88)","(-0.17,0.34)","(-0.31,0.17)","(-0.30,0.20)","(-0.69,-0.04)","(1.00,1.00)"
