In [99]:
import os, glob, json

import re, math

import pprint

import numpy as np
import pandas as pd
import matplotlib 
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
%matplotlib inline

pd.options.display.max_columns = 1000
# pd.options.display.precision = 1
pd.options.display.float_format = lambda x: '%.1f' % x

import seaborn as sns
sns.set(rc={"figure.figsize": (6, 0.75)})
sns.set_style("white")

from dist_tools import *

In [237]:
pol_states = ['FL', 'IL', 'LA', 'MD', 'MN', 'NC', 'PA', 'TN', 'TX', 'VA', 'WI']

method_full = {"axis_ratio" : "Axis Ratio", "dist_a" : "Areal Distance", "dist_p" : "Population Distance",
               "dyn_radius" : "Dynamic Radius", "ehrenburg" : "Inscribed Circles", "exchange" : "Exchange", 
               "harm_radius" : "Harmonic Radius", "hull_a" : "Hull Area", "hull_p" : "Hull Population", 
               "inertia_a" : "Inertia Area", "inertia_p" : "Inertia Population", "mean_radius" : "Mean Radius", 
               "polsby" : "Isoperimeter Quotient", "power" : "Power Diagram", "reock" : "Circumscribing Circles", 
               "rohrbach" : "Distance to Perimeter", "split" : "Split-Line", "path_frac" : "Path Fraction",
               "107" : "107th Congress", "111" : "111th Congress", "114" : "114th Congress"}

states_full = {"PA" : "Pennsylvania", "LA" : "Lousiana", "NC" : "North Carolina", "WI" : "Wisconsin", 
               "VA" : "Virginia", "FL" : "Florida", "IL" : "Illinois", "MD" : "Maryland", 
               "MN" : "Minnesota", "TN" : "Tennessee", "TX" : "Texas"}

In [83]:
seats = pd.read_sql("select upper(usps) usps, seats from states where fips < 57 order by usps;", index_col = "usps",
                    con = psycopg2.connect(database = "census", user = user, password = passwd,
                                           host = "saxon.harris.uchicago.edu", port = 5432))["seats"].to_dict()

## Vote Share

In [346]:
df = pd.DataFrame(columns = ["State", "Method", "Year", "Competitive", "Dem Seats"])

file_list = sorted(glob.glob("../chalk/s3/res/json/il_*_s2[78]*") + \
                   glob.glob("../chalk/s3/res/json/md_*_s2[78]*") + \
                   glob.glob("../chalk/s3/res/json/nc_*_s2[78]*") + \
                   glob.glob("../chalk/s3/res/json/tx_*_s2[78]*") + \
                   glob.glob("../chalk/s3/res/json/*_split_s001*"))
    
rseat_shares = {st : {} for st in pol_states}

for fidx, f in enumerate(file_list):
    
    with open(f) as fi: j = json.load(fi)

    if not fidx % 1000: print(f)
        
    state = j["USPS"]
    
    method = re.sub(r".*json/[a-z]{2}_([a-z_]*)_s.*", r"\1", f)

    if state not in pol_states: continue
    
    if state == "TX":
        if method == "axis_ratio":
            if j["PopulationDeviation"] > 0.10: continue
        if j["PopulationDeviation"] > 0.05: continue
    elif method == "axis_ratio":
        if j["PopulationDeviation"] > 0.05: continue
    elif method == "split":
        if j["PopulationDeviation"] > 0.10: continue
    elif j["PopulationDeviation"] > 0.02: continue

    elections = sorted(list(j["Elections"].keys()))

    if method not in rseat_shares[state]:
        rseat_shares[state][method] = []
            
    for el in elections:
            
        competitive  = [math.fabs(j["Districts"][d]["Elections"][el]["DemFrac"] - 0.5) < 0.025 for d in range(j["Seats"])]
        ncompetitive = sum(competitive)

        rseat_shares[state][method].extend([j["Districts"][d]["Elections"][el]["RepFrac"] for d in range(j["Seats"])])
        
        df.loc[fidx*10000 + int(el)] = [state, method, int(el), ncompetitive, j["Elections"][el]["DemSeats"]]

df["Year"] = df["Year"].astype(int)
df["Competitive"] = df["Competitive"].astype(int)
df["Dem Seats"] = df["Dem Seats"].astype(int)

../chalk/s3/res/json/al_split_s001.json
../chalk/s3/res/json/il_dist_p_s284_c003.json
../chalk/s3/res/json/il_exchange_s274_c003.json
../chalk/s3/res/json/il_hull_a_s285_c000.json
../chalk/s3/res/json/il_inertia_p_s276_c009.json
../chalk/s3/res/json/il_polsby_s282_c014.json
../chalk/s3/res/json/il_power_s278_c054.json
../chalk/s3/res/json/il_power_s288_c054.json
../chalk/s3/res/json/md_axis_ratio_s278_c008.json
../chalk/s3/res/json/md_dist_p_s288_c011.json
../chalk/s3/res/json/md_exchange_s278_c011.json
../chalk/s3/res/json/md_hull_a_s288_c011.json
../chalk/s3/res/json/md_inertia_p_s278_c011.json
../chalk/s3/res/json/md_polsby_s283_c011.json
../chalk/s3/res/json/md_power_s278_c071.json
../chalk/s3/res/json/md_power_s288_c071.json
../chalk/s3/res/json/nc_axis_ratio_s277_c014.json
../chalk/s3/res/json/nc_dist_p_s288_c017.json
../chalk/s3/res/json/nc_exchange_s278_c017.json
../chalk/s3/res/json/nc_hull_a_s288_c017.json
../chalk/s3/res/json/nc_inertia_p_s278_c017.json
../chalk/s3/res/json/

In [349]:
competitive_dfs = []
ST = ["IL", "MD", "NC", "TX"]

for s in ST:

    competitive = []
    for m in rseat_shares[s]:

        plot_share(rseat_shares, s, m, True, True)
        
        competitive.append([m, (seats[s] / len(rseat_shares[s][m])) * \
                                  len([1 for v in rseat_shares[s][m] if math.fabs(v - 0.5) < 0.025]),
                            "XZfigs/{}_{}ZX".format(s.lower(), m)])
        
    cdf = pd.DataFrame(competitive, columns = ["Method", "# Comp.", "Figure"])
    cdf.replace({"Method" : method_full}, inplace = True)
    cdf.set_index(["Method"], inplace = True)
        
    competitive_dfs.append(cdf)
    
cdf = pd.concat(competitive_dfs, axis = 1)
cdf.columns = pd.MultiIndex.from_product([ST, ["# Comp.", "Rep. Vote Share"]])

In [355]:
cdf.to_csv("competitive.csv")

pd.read_csv("competitive.csv", skiprows = 2,
            names = pd.MultiIndex.from_product([ST, ["# Comp.", "Rep. Vote Share"]]))

Unnamed: 0_level_0,IL,IL,MD,MD,NC,NC,TX,TX
Unnamed: 0_level_1,# Comp.,Rep. Vote Share,# Comp.,Rep. Vote Share,# Comp.,Rep. Vote Share,# Comp.,Rep. Vote Share
Areal Distance,1.9,XZfigs/il_dist_aZX,0.4,XZfigs/md_dist_aZX,2.2,XZfigs/nc_dist_aZX,3.1,XZfigs/tx_dist_aZX
Axis Ratio,1.4,XZfigs/il_axis_ratioZX,0.2,XZfigs/md_axis_ratioZX,2.8,XZfigs/nc_axis_ratioZX,,
Circumscribing Circles,1.7,XZfigs/il_reockZX,0.3,XZfigs/md_reockZX,2.3,XZfigs/nc_reockZX,3.3,XZfigs/tx_reockZX
Distance to Perimeter,1.7,XZfigs/il_rohrbachZX,0.3,XZfigs/md_rohrbachZX,2.3,XZfigs/nc_rohrbachZX,3.4,XZfigs/tx_rohrbachZX
Dynamic Radius,1.9,XZfigs/il_dyn_radiusZX,0.4,XZfigs/md_dyn_radiusZX,2.3,XZfigs/nc_dyn_radiusZX,3.0,XZfigs/tx_dyn_radiusZX
Exchange,1.8,XZfigs/il_exchangeZX,0.3,XZfigs/md_exchangeZX,2.2,XZfigs/nc_exchangeZX,2.9,XZfigs/tx_exchangeZX
Harmonic Radius,1.7,XZfigs/il_harm_radiusZX,0.3,XZfigs/md_harm_radiusZX,2.4,XZfigs/nc_harm_radiusZX,3.0,XZfigs/tx_harm_radiusZX
Hull Area,1.5,XZfigs/il_hull_aZX,0.4,XZfigs/md_hull_aZX,2.6,XZfigs/nc_hull_aZX,2.5,XZfigs/tx_hull_aZX
Hull Population,1.7,XZfigs/il_hull_pZX,0.4,XZfigs/md_hull_pZX,2.4,XZfigs/nc_hull_pZX,3.0,XZfigs/tx_hull_pZX
Inertia Area,2.0,XZfigs/il_inertia_aZX,0.3,XZfigs/md_inertia_aZX,2.3,XZfigs/nc_inertia_aZX,3.0,XZfigs/tx_inertia_aZX


In [4]:
for s in pol_states:
    
    st = pd.read_sql("select seats, epsg, lower(usps) usps, fips from states where usps = upper('{}');".format(s),
                     con = psycopg2.connect(database = "census", user = user, password = passwd,
                                            host = "saxon.harris.uchicago.edu", port = 5432)).loc[0].to_dict()
    
    seats, epsg, fips = st["seats"], st["epsg"], st["fips"]

    votes = pd.read_csv(s.lower() + "_votes.csv", index_col = "rn")
    
    for ssn in [107, 111, 114]:
    
        cd = cdmap_seats(str(ssn), ssn, epsg, fips, votes, list(df[df.State == s].Year.unique())) # .filter(regex = "[0-9]{4}$").mean() * seats
        
        for el in df[df.State == s].Year.unique().astype(str):
            dseats = cd[el].mean() * seats
            competitive = (np.abs(cd[el + " D Fr"] - 0.5) < 0.025).sum()
            df.loc[df.index.max() + 1] = [s, str(ssn), int(el), competitive, dseats]

In [5]:
methods = sorted(list(df.Method.unique()))

In [12]:
comp = df.groupby(["Method", "State", "Year"])["Competitive"].mean().unstack(level = [1, 2])
comp.index = pd.Series(comp.index).map(method_full)
comp.sort_index(inplace = True)
comp.sortlevel([0, 1], axis=1, inplace=True)

table = comp.to_latex(column_format = "l" + "".join(["|" + "c"*comp[s].shape[1] for s in list(comp.columns.levels[0])]), 
                      multicolumn = True, multicolumn_format= "c|")
table = re.sub("Method.*", "", table)
table = re.sub(r"([0-9]{4})", r"\\fontsize{8.5}{12}\selectfont \1", table)
table = table.replace("nan", " -- ")
table = table.replace("c|}{WI", "c}{WI")
table = table.replace("toprule", "hline \hline ")
table = table.replace("\midrule", "\hline")
table = table.replace("bottomrule", "hline \hline")
table = table.replace("Areal Distance", "\\hline \nAreal Distance")
table = table + "\caption{Tabulated are the expected number of ``competitive'' seats, with margins of victory less than 2.5\%, for each State and Election. Still filling in some states with low stats.  \\note{Suggestions for error presentation welcome.}}"
table = table + "\label{tab:competitiveness}"
table = "\n\\begin{table}\n\\renewcommand{\\arraystretch}{1.3}\n " + table + "\n\\end{table}\n "

table = "\\afterpage{\\newgeometry{left=1in,right=1in,top=0.55in,bottom=0.55in}\\begin{landscape}\n\n" + table
table = table + "\n\n\end{landscape}}\clearpage\\restoregeometry\n"

# table = "^NT\n\n" + table

with open("tex/competitiveness.tex", "w") as o: o.write(table)



In [8]:
df.groupby(["Method", "State", "Year"])["Dem Seats"].mean().unstack(level = [1, 2])

State,FL,IL,IL,LA,LA,MD,MD,NC,NC,PA,PA,PA,PA,TX,TX,TX,TX,TX,TX,WI,WI,WI
Year,2008,2008,2016,2012,2016,2008,2016,2012,2016,2000,2004,2008,2012,1996,2000,2004,2008,2012,2016,2004,2008,2012
Method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
107,14.1,15.3,11.7,0.9,0.9,5.0,5.0,4.3,5.4,8.6,7.7,12.0,7.7,16.8,12.0,10.8,14.4,13.2,14.4,4.4,7.1,4.4
111,10.8,15.2,11.4,0.9,0.9,6.0,6.0,5.0,4.0,9.5,8.5,9.5,8.5,12.4,10.1,7.9,12.4,11.2,12.4,4.0,7.0,4.0
114,11.0,16.0,11.0,1.0,1.0,7.0,7.0,3.0,3.0,6.0,6.0,10.0,6.0,14.0,10.0,8.0,11.0,11.0,11.0,3.0,7.0,3.0
axis_ratio,13.1,16.3,11.6,0.8,0.7,6.1,5.9,5.4,5.0,8.2,7.6,11.0,8.4,,,,,,,3.8,7.0,4.6
dist_a,13.0,15.9,11.3,0.7,0.7,6.1,6.0,5.9,5.8,8.4,7.5,11.3,8.4,12.1,8.2,8.5,12.4,11.3,12.5,3.6,6.6,4.7
dist_p,13.2,15.9,11.6,0.8,0.8,6.1,5.9,6.0,5.8,8.3,7.6,10.6,8.2,12.5,8.7,8.6,12.7,11.5,13.3,3.6,6.7,4.3
dyn_radius,13.3,16.4,11.6,0.7,0.7,6.2,5.8,5.5,5.1,8.2,7.5,10.9,8.4,12.4,8.3,8.1,12.5,11.4,12.8,3.7,6.8,4.7
ehrenburg,,16.0,12.0,0.5,0.6,6.3,5.8,5.6,5.3,8.2,7.7,10.8,8.6,12.0,9.0,8.0,11.0,11.0,11.0,3.8,6.8,4.7
exchange,12.5,16.3,11.7,0.8,0.8,6.3,5.8,5.5,5.2,8.5,7.8,10.8,8.4,12.6,8.2,8.4,12.6,11.7,12.8,3.7,6.8,4.6
harm_radius,13.2,16.0,11.5,0.4,0.4,6.2,5.9,6.0,5.2,8.2,7.1,10.7,8.2,12.4,8.0,8.1,12.3,11.4,12.6,3.6,6.7,4.4


In [9]:
sns.set(rc={"figure.figsize": (2.5, 0.5)})
sns.set_style("white")

for s in states:

    print(s, end = " ")
    for y in sorted(list(df[df.State == s].Year.unique())):
    
        min_seats = df.loc[(df.State == s) & (df.Year == y), "Dem Seats"].dropna().astype(int).min()
        max_seats = df.loc[(df.State == s) & (df.Year == y), "Dem Seats"].dropna().astype(int).max()
        bins = np.arange(min_seats-0.5, max_seats+0.6)

        # f, ax = plt.subplots(len(methods), sharex=True, sharey=True)
        for mi, m in enumerate(methods):
            
            f, ax = plt.subplots(1, sharex=True, sharey=True)

            dseats = sorted(list(df[(df.State == s) & (df.Method == m) & (df.Year == y)]["Dem Seats"].dropna().astype(float)))

            if len(dseats) > 1:

                # * 2 for seaborn bug: https://stackoverflow.com/questions/42404074/
                seats90 = set(dseats[round(len(dseats)*0.1):round(len(dseats)*0.9)])
                seats90 = list(np.arange(min(seats90), max(seats90)+0.1))

                sns.distplot(seats90 * 2, ax = ax, bins = bins, kde = False, 
                             hist_kws = {"alpha" : 0.1, "color" : "black",
                                         "weights" : [0.5] * len(seats90) * 2})

                # print(len(dseats*2), len([0.5] * len(dseats) * 2))
                sns.distplot(dseats * 2, ax = ax, bins = bins, norm_hist = True, kde = False, 
                             hist_kws={"alpha" : 1.0, "color" : "#4DAFFF",
                                       "weights" : [0.5] * len(dseats) * 2})

            if len(dseats):
                    
                avg_seats = sum(dseats) / len(dseats)
                ax.plot([avg_seats, avg_seats], [0, 1], linewidth = 3, linestyle = "solid", 
                        c = "r" if m[0] == "1" else "k")

            sns.despine(left = True)
            ax.set_xlim(bins[0], bins[-1])
            ax.set_ylim(0, 1)
            ax.set_yticks([])
            ax.set_xticks(range(min_seats, max_seats+1))

            f.savefig("mini_hist/{}_{}_{}_ax.pdf".format(s, y, m), bbox_inches='tight', pad_inches=0)

            ax.set_xticks([])
            f.savefig("mini_hist/{}_{}_{}.pdf".format(s, y, m), bbox_inches='tight', pad_inches=0)
                
            plt.close('all')

        # f.savefig("{}_{}.pdf".format(s, y), bbox_inches='tight', pad_inches=0.05)

FL IL LA MD NC PA TX WI 

In [10]:
test = df.groupby(["Method", "State", "Year"])[["Dem Seats"]].mean()
test.rename(columns = {"Dem Seats" : "Seats"}, inplace = True)
test["file"] = test.index.map(lambda x : "XZ{}_{}_{}ZX".format(x[1], x[2], x[0]))
test = test.unstack(level = [1, 2])
test = test.reorder_levels([1, 2, 0], axis=1)
test.index = pd.Series(test.index).map(method_full)
test.sortlevel([0, 1], axis=1, inplace=True)
test.sort_index(inplace = True)


caption = """\caption{{Votes from presidential elections in {} are aggregated from precinct-level returns, into maps simulated with each algorithm or compactness metric. 
             The seats expected to accrue to Democrats (mean across maps) are displayed numerically as well as by a solid black line.
             The normalized distribution of seats per metric/algorithm is shown in blue and the 10-90\% range of possible seats is highlighted in gray.
             The same re-aggregation is performed for enacted maps used for the 107th, 111th, and 114th Congresses and shown in red.
             Since reapportionment shifts the number of seats per state,
               the entries for the 107th and 111th Congresses are the Democratic share,
               times the {} assigned after the 2010 Census.
             }}"""

for s in states:

    st = pd.read_sql("select seats, epsg, lower(usps) usps, fips from states where usps = upper('{}');".format(s),
                     con = psycopg2.connect(database = "census", user = user, password = passwd,
                                            host = "saxon.harris.uchicago.edu", port = 5432)).loc[0].to_dict()
    
    seats, epsg, fips = st["seats"], st["epsg"], st["fips"]

    years = list(df[df.State == s].Year.unique())
    if s == "TX": years = [2004, 2008, 2012, 2016]
    
    for y in years: 
        test[s][y]["file"][test[s][y]["file"].isnull()] = "XZblankZX"
    
    table = test[s][years].to_latex(column_format = "l" + " rm{7em}" * len(years))
    table = table.replace("XZ", "\includegraphics[width=7em]{mini_hist/")
    table = table.replace("\_", "_")
    table = table.replace("ZX", "}")
    
    for y in years:
        table = re.sub(r"Seats & *file", "\multicolumn{{2}}{{c}}{{{} Presidential}}".format(y), table, count = 1)
    
    table = re.sub("Year.*", "", table)
    table = re.sub("Method.*", "", table)
    table = table.replace("nan", "")
    # table = re.sub("None", "", table)
    table = re.sub("None", "\includegraphics[width=7em]{mini_hist/blank}", table)
    
    table = table.replace("toprule", "hline \hline \\\\")
    table = table.replace("\midrule", "\\\\ \hline \\\\")
    table = table.replace("bottomrule", "hline \hline")
    table = table.replace("Areal Distance", "\\\\ \hline \\\\ \nAreal Distance")
    table = table.replace("split", "split_ax")
    # table = "^NT" + table
    table = table + caption.format(states_full[s], seats)
    table = table + "\label{{tab:{}_seats}}".format(s)
    table = "\n\\begin{table}\n\\renewcommand{\\arraystretch}{0.7}\n " + table + "\n\\end{table}\n "

    if len(years) > 3: table = "\n\n\\begin{landscape} \n" + table + "\n\\end{landscape} "
    
    with open("tex/{}_table.tex".format(s), "w") as o: o.write(table)
# test["PA"]



In [11]:
test["PA"]

Year,2000,2000,2004,2004,2008,2008,2012,2012
Unnamed: 0_level_1,Seats,file,Seats,file,Seats,file,Seats,file
Method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
107th Congress,8.6,XZPA_2000_107ZX,7.7,XZPA_2004_107ZX,12.0,XZPA_2008_107ZX,7.7,XZPA_2012_107ZX
111th Congress,9.5,XZPA_2000_111ZX,8.5,XZPA_2004_111ZX,9.5,XZPA_2008_111ZX,8.5,XZPA_2012_111ZX
114th Congress,6.0,XZPA_2000_114ZX,6.0,XZPA_2004_114ZX,10.0,XZPA_2008_114ZX,6.0,XZPA_2012_114ZX
Areal Distance,8.4,XZPA_2000_dist_aZX,7.5,XZPA_2004_dist_aZX,11.3,XZPA_2008_dist_aZX,8.4,XZPA_2012_dist_aZX
Axis Ratio,8.2,XZPA_2000_axis_ratioZX,7.6,XZPA_2004_axis_ratioZX,11.0,XZPA_2008_axis_ratioZX,8.4,XZPA_2012_axis_ratioZX
Circumscribing Circles,8.3,XZPA_2000_reockZX,7.6,XZPA_2004_reockZX,11.1,XZPA_2008_reockZX,8.5,XZPA_2012_reockZX
Distance to Perimeter,8.3,XZPA_2000_rohrbachZX,7.3,XZPA_2004_rohrbachZX,11.2,XZPA_2008_rohrbachZX,8.2,XZPA_2012_rohrbachZX
Dynamic Radius,8.2,XZPA_2000_dyn_radiusZX,7.5,XZPA_2004_dyn_radiusZX,10.9,XZPA_2008_dyn_radiusZX,8.4,XZPA_2012_dyn_radiusZX
Exchange,8.5,XZPA_2000_exchangeZX,7.8,XZPA_2004_exchangeZX,10.8,XZPA_2008_exchangeZX,8.4,XZPA_2012_exchangeZX
Harmonic Radius,8.2,XZPA_2000_harm_radiusZX,7.1,XZPA_2004_harm_radiusZX,10.7,XZPA_2008_harm_radiusZX,8.2,XZPA_2012_harm_radiusZX
