In [2]:
import pandas as pd
import numpy as np
from scipy.stats import linregress
import matplotlib.pyplot as plt
import os

file_path = os.path.join("CSVs",'election_data_clean.csv')
elec_data= pd.read_csv(file_path,index_col=0)

elections = elec_data.groupby("Contest Name")

In [4]:
state_house = elections.get_group('State Representative')
house_gb = state_house.groupby("District")

In [5]:
#STATE_HOUSE_ANALYZE Function: Compiles results (by percentage) in each state house race
def sh_analyze(District):
    missing_data = []
    sh_n = house_gb.get_group(f" District {District}")
    gb_n = sh_n.groupby('Precinct Number')
    gb_n_keys = gb_n.groups.keys()
    my_data = {'Precinct Number':[],
               'Registered Republican %':[],
               'Registered Democrat %':[],
               'Registered Other %':[],
               'Total Registered':[],
               'Dem Vote %':[],
               'Repub Vote %':[],
               'Total_Votes':[]}
    for precinct in gb_n_keys:
        data = gb_n.get_group(precinct)
        total_registered = float(data['Total Registered'].tolist()[0])
        try:
            dems=float(data[data['Party']=='DEM']["Vote Total"].tolist()[0])
        except IndexError:
            dems=0
        try:
            repubs=float(data[data['Party']=='REP']["Vote Total"].tolist()[0])
        except IndexError:
            repubs=0
        if total_registered > 0 and dems+repubs>0:
            my_data['Precinct Number'].append(precinct)
            my_data['Registered Republican %'].append(100*float(data['Registered Republicans'].tolist()[0])/total_registered)
            my_data['Registered Democrat %'].append(100*float(data['Registered Democrats'].tolist()[0])/total_registered)
            my_data['Registered Other %'].append(100*float(data['Registered Other'].tolist()[0])/total_registered)
            my_data['Total Registered'].append(total_registered)
            total = dems+repubs
            my_data['Dem Vote %'].append(100*dems/total)
            my_data['Repub Vote %'].append(100*repubs/total)
            my_data['Total_Votes'].append(total)
        elif total_registered == 0:
            missing_data.append(f"Precinct {precinct} has no registered voters")
        elif dems+repubs == 0:
            missing_data.append(f"Precinct {precinct} has no major party votes")
    df = pd.DataFrame(my_data,index=my_data["Precinct Number"])
    df['Turnout']=100*df['Total_Votes']/df['Total Registered']
    
    #We remove precincts with lower than 4 percent turnout as these are typically precincts not in the district
    df = df[df['Turnout']>=4]
    return [df,missing_data]

In [10]:
#Interprets sh_analyze output for each district with a scatterplot of turnout vs. democrats registered
def turnout_scatter(District):
    df = sh_analyze(District)[0]
    plt.scatter(df['Registered Democrat %'],df['Turnout'])
    plt.xlabel("Percent of Voters Registered as Democrats")
    plt.ylabel("Voter Turnout (Percent)")
    plt.title(f"Voter Turnout vs. Party by Precincts in House District {District}")
    plt.savefig(f"state_house_district_{District}_turnout")
    plt.show()
    print(linregress(df['Registered Democrat %'],df['Turnout']))
   

In [7]:
#Determines the result if turnout by district had been proportional to district size
def proportional_vote(District):
    district_sh_df = sh_analyze(District)[0]
    dem_hyp = (district_sh_df['Dem Vote %']/100*district_sh_df['Total Registered']).sum()
    repub_hyp = (district_sh_df['Repub Vote %']/100*district_sh_df['Total Registered']).sum()
    return round(dem_hyp/(dem_hyp+repub_hyp)*100,4)

#Determines the actual election result
def actual_vote(District):
    district_sh_df = sh_analyze(District)[0]
    dem_act = (district_sh_df['Dem Vote %']/100*district_sh_df['Total_Votes']).sum()
    repub_act = (district_sh_df['Repub Vote %']/100*district_sh_df['Total_Votes']).sum()
    return round(dem_act/(dem_act+repub_act)*100,4)

In [9]:
#Prints actual and proportional election results for each district
for n in range(1,121):
    try:
        print((f"In district {n}, democrats got {actual_vote(n)}% of the vote. "+
               f"Had precincts voted proportionally, they would have gotten {proportional_vote(n)}%"))
    except KeyError:
        print(f"The district {n} primary was cancelled because a candidate ran unopposed.")


In district 1, democrats got 39.2184% of the vote. Had precincts voted proportionally, they would have gotten 39.8744%
KeyError District 2
In district 3, democrats got 0.0% of the vote. Had precincts voted proportionally, they would have gotten 0.0%
In district 4, democrats got 27.8709% of the vote. Had precincts voted proportionally, they would have gotten 28.4708%
KeyError District 5
In district 6, democrats got 0.0% of the vote. Had precincts voted proportionally, they would have gotten 0.0%
KeyError District 7
KeyError District 8
KeyError District 9
In district 10, democrats got 24.7237% of the vote. Had precincts voted proportionally, they would have gotten 25.5509%
In district 11, democrats got 30.2657% of the vote. Had precincts voted proportionally, they would have gotten 30.3695%
In district 12, democrats got 40.7737% of the vote. Had precincts voted proportionally, they would have gotten 41.373%
KeyError District 13
KeyError District 14
In district 15, democrats got 49.0878% 

In district 85, democrats got 45.388% of the vote. Had precincts voted proportionally, they would have gotten 45.8756%
In district 86, democrats got 59.824% of the vote. Had precincts voted proportionally, they would have gotten 60.495%
In district 87, democrats got 100.0% of the vote. Had precincts voted proportionally, they would have gotten 100.0%
KeyError District 88
In district 89, democrats got 49.9796% of the vote. Had precincts voted proportionally, they would have gotten 50.6843%
KeyError District 90
KeyError District 91
KeyError District 92
In district 93, democrats got 46.3959% of the vote. Had precincts voted proportionally, they would have gotten 46.8922%
KeyError District 94
KeyError District 95
KeyError District 96
KeyError District 97
In district 98, democrats got 64.043% of the vote. Had precincts voted proportionally, they would have gotten 64.3779%
KeyError District 99
KeyError District 100
KeyError District 101
KeyError District 102
In district 103, democrats got 53