In [2]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
from formatting import comma_del, remove_zeros

In [3]:
# Iterates through all csv files in 'County_Data_Clean' and produces a dictionary containing county abbreviations and 
# file names. 
files = os.listdir("County_Data_Clean")
counties = [x.split("_")[0] for x in files]
file_dict = {counties[i]:files[i] for i in range(len(files))}

# Outputs a CSV of county abbreviations
pd.DataFrame(counties).to_csv(os.path.join('CSVs',"Counties.csv"))

In [4]:
# Imports Voter Registration Data For Precincts.
precinct_df = pd.read_csv(os.path.join('CSVs',"2018gen_precinct.csv"))

# Drops blank row.
precinct_df = precinct_df.drop(5881)

In [5]:
# Creates Dictionary of Data Frames.
# Example: {..., "SEM":<data frame with clean Seminole County data>, ...}
county_df_dict = {}
for county in counties:
    path = os.path.join("County_Data_Clean",file_dict[county])
    county_df_dict[county] = pd.read_csv(path,index_col=0)

In [6]:
# Removes Leading Zeros from Precinct Data.
precinct_df['Precinct Number'].map(remove_zeros)
precinct_df

Unnamed: 0,County Code,Precinct Number,Republican Party of Florida,Florida Democratic Party,Other,Total
0,ALA,1,584,538,287,1409
1,ALA,2,739,951,379,2069
2,ALA,3,1626,1520,961,4107
3,ALA,4,1406,1040,671,3117
4,ALA,5,733,1552,841,3126
...,...,...,...,...,...,...
5876,WAS,5,729,546,247,1522
5877,WAS,6,221,318,85,624
5878,WAS,7,810,507,226,1543
5879,WAS,8,764,696,175,1635


In [7]:
# Reformats precinct numbers for merge
for county in county_df_dict:
    county_df_dict[county] = county_df_dict[county].rename(columns = {"Unique PCT Identifier":"Precinct Number"})
    county_df_dict[county]['Precinct Number'] = county_df_dict[county]['Precinct Number'].map(str)
    county_df_dict[county]['Precinct Number'] = county_df_dict[county]['Precinct Number'].map(remove_zeros)  

In [8]:
# Separates precinct data into a dictionary of data frames.
precinct_dict={}
for county in counties:
    precinct_dict[county]=precinct_df[precinct_df['County Code']==county]

In [9]:
# Merges precinct data into elections data for each county.
for county in counties:
    county_df_dict[county] = pd.merge(county_df_dict[county],precinct_dict[county],
                                      on=["Precinct Number","County Code"],
                                      how="inner")
    
    county_df_dict[county] = county_df_dict[county].rename(columns = {"Republican Party of Florida":"Registered Republicans",
                                     "Florida Democratic Party":"Registered Democrats",
                                     "Other":"Registered Other"})
    
    county_df_dict[county]['Total'] = county_df_dict[county]['Total'].map(comma_del)
    
 

In [10]:
county_precinct_gb_dict = {}

for county in counties:
    county_precinct_gb_dict[county] = county_df_dict[county].groupby('Precinct Number')


In [11]:
registration_discrepancies = {}
for county in counties:
    for precinct in county_precinct_gb_dict[county].groups.keys():
        df = county_precinct_gb_dict[county].get_group(precinct)
        turnout_vals = df['Total Registered'].map(int)
        precinct_vals = df['Total'].map(int)
        discrepancy = abs(max(turnout_vals) - max(precinct_vals))
        tolerance = .005*( max(turnout_vals) + max(precinct_vals) )
        if max(precinct_vals)==0:
            registration_discrepancies[f"{county}--{precinct}"]=['p=0']
        if max(turnout_vals)==0:
            try:
                registration_discrepancies[f"{county}--{precinct}"].append('t=0')
            except KeyError:
                registration_discrepancies[f"{county}--{precinct}"]=['t=0']
        if discrepancy > tolerance:
            try:
                registration_discrepancies[f"{county}--{precinct}"].append(f't={max(turnout_vals)},p={max(precinct_vals)}')
            except KeyError:
                registration_discrepancies[f"{county}--{precinct}"]=[f't={max(turnout_vals)},p={max(precinct_vals)}']
        #if sum((df['Total Registered'].map(int) - df['Vote Total'].map(int)).map(abs)) != 0:
            #print(county,precinct)

In [43]:
registration_discrepancies

{'BRO--L015': ['t=9560,p=956'],
 'CAL--101': ['t=1176,p=564'],
 'CAL--401': ['t=781,p=244'],
 'CAL--501': ['t=882,p=547'],
 'DAD--100': ['t=3951,p=7479'],
 'DAD--107': ['t=938,p=949'],
 'DAD--11': ['t=1809,p=1955'],
 'DAD--124': ['t=2083,p=3289'],
 'DAD--161': ['t=1696,p=1716'],
 'DAD--167': ['t=448,p=507'],
 'DAD--172': ['t=182,p=185'],
 'DAD--182': ['t=764,p=1010'],
 'DAD--196': ['t=2216,p=2847'],
 'DAD--202': ['t=2057,p=2190'],
 'DAD--203': ['t=1211,p=2908'],
 'DAD--205': ['t=1997,p=2040'],
 'DAD--206': ['t=1252,p=2365'],
 'DAD--207': ['t=2937,p=3192'],
 'DAD--213': ['t=1579,p=2915'],
 'DAD--215': ['t=1742,p=2751'],
 'DAD--221': ['t=1259,p=2447'],
 'DAD--222': ['t=1983,p=2564'],
 'DAD--233': ['t=1777,p=2630'],
 'DAD--248': ['t=1833,p=2887'],
 'DAD--250': ['t=1653,p=2565'],
 'DAD--251': ['t=2381,p=2690'],
 'DAD--256': ['t=1344,p=2238'],
 'DAD--258': ['t=718,p=1132'],
 'DAD--261': ['t=1657,p=2321'],
 'DAD--266': ['t=94,p=97'],
 'DAD--269': ['t=1008,p=1600'],
 'DAD--281': ['t=220,p=355

In [36]:
JEF_pct = list(county_precinct_gb_dict['JEF'].groups.keys())
#df[df['Contest Name']=='United States Senator']
total_registered = 0
for key in JEF_pct:
    df = county_precinct_gb_dict['JEF'].get_group(key)
    senate = (df['Contest Name']=='United States Senator')
    print(df[senate]['Vote Total'].sum())
    total_registered += df[senate]['Vote Total'].sum()

print(total_registered)
    

471
577
681
570
138
118
406
104
280
206
640
758
517
618
714
606
7404
