In [1]:
%load_ext autoreload
%autoreload 2
%cd ..

/home/packer.61/Documents/vote-counts


In [2]:
import pandas as pd
from pathlib import Path
import numpyro
from numpyro import distributions as dist
from numpyro.infer import NUTS, MCMC, Predictive
from jax import random, numpy as jnp
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import stats
from src import preprocessing
import numpy as np

os.environ['CUDA_VISIBLE_DEVICES'] = '3'

path_data = Path("data")
path_raw_data = path_data / "raw_data"

In [3]:
def get_pivot_odds(df):
    for ind, row in df.iterrows():
        mean_p = row["p_democrat"]
        std_p = row["std_p_democrat"]
        df.loc[ind, "p_05"] = stats.t.pdf(0.5, 4, loc=mean_p, scale=std_p)

        
    df["pivot_odds"] = df["p_05"] / df["total_votes"]
    df["log_pivot_odds"] = np.log(df["p_05"] / df["total_votes"])
    return df

In [21]:
raw_data = pd.read_csv(path_raw_data / "President_2020.csv")

# In Minnesota and North Dakota, the Democrats go by different names:
other_dems = ["DEMOCRATIC FARMER LABOR", "DEMOCRATIC-NPL"]
raw_data.loc[
    raw_data["party_detailed"].apply(lambda p: p in other_dems), "party_simplified"
] = "DEMOCRAT"

# Preprocessing:
raw_data["party_simplified"] = raw_data["party_simplified"].astype(str)
raw_data["jurisdiction_fips"] = raw_data["jurisdiction_fips"].dropna().astype(int)
raw_data = raw_data.query(
    "party_simplified == 'REPUBLICAN' or party_simplified == 'DEMOCRAT'"
)

  raw_data = pd.read_csv(path_raw_data / "President_2020.csv")


In [22]:
def get_vote_totals_by(column_name: str):
    groupby = [column_name, "party_simplified"]
    partisan_votes_by_jurisdiction = (
        raw_data[[*groupby, "votes"]]
        .groupby(groupby)
        .sum()
        .reset_index()
        .pivot_table(values="votes", columns="party_simplified", index=column_name)
    )
    partisan_votes_by_jurisdiction["total_votes"] = (
        partisan_votes_by_jurisdiction["DEMOCRAT"]
        + partisan_votes_by_jurisdiction["REPUBLICAN"]
    )
    partisan_votes_by_jurisdiction["p_democrat"] = (
        partisan_votes_by_jurisdiction["DEMOCRAT"]
        / partisan_votes_by_jurisdiction["total_votes"]
    )

    # Setting a fixed std, should be determined by samples
    std = 0.04
    partisan_votes_by_jurisdiction["std_p_democrat"] = std
    return partisan_votes_by_jurisdiction

In [24]:
partisan_votes_by_jurisdiction = get_pivot_odds(get_vote_totals_by("county_fips"))

In [25]:
to_save = partisan_votes_by_jurisdiction[["pivot_odds", "log_pivot_odds"]]
to_save.index = to_save.index.astype(int).to_series().apply(lambda i : str(i).zfill(5))
to_save.to_csv(path_data / "partisan_county_pivot_odds.csv")