In [16]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
from formatting import comma_del, remove_zeros

In [3]:
#Iterates through all csv files in 'County_Data_Clean' and 
#produces a dictionary containing county abbreviations and file names
#Example:   {... ,'SEM':'SEM_Data_Clean.csv', ... }
files = os.listdir("County_Data_Clean")
counties = [x.split("_")[0] for x in files]
file_dict = {counties[i]:files[i] for i in range(len(files))}

#Outputs a CSV of county abbreviations
pd.DataFrame(counties).to_csv("Counties.csv")

#Source: https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory

In [4]:
#Imports Voter Registration Data For Precincts
precinct_df = pd.read_csv("2018gen_precinct.csv")
#Drops blank row
precinct_df = precinct_df.drop(5881)

In [5]:
#Creates Dictionary of Data Frames
#Example: {..., "SEM":<data frame with clean Seminole County data>, ...}
county_df_dict = {}
for county in counties:
    path = os.path.join("County_Data_Clean",file_dict[county])
    county_df_dict[county] = pd.read_csv(path,index_col=0)

In [6]:
#Removes Leading Zeros from Precinct Data
precinct_df['Precinct Number'].map(remove_zeros)
precinct_df

Unnamed: 0,County Code,Precinct Number,Republican Party of Florida,Florida Democratic Party,Other,Total
0,ALA,1,584,538,287,1409
1,ALA,2,739,951,379,2069
2,ALA,3,1626,1520,961,4107
3,ALA,4,1406,1040,671,3117
4,ALA,5,733,1552,841,3126
...,...,...,...,...,...,...
5876,WAS,5,729,546,247,1522
5877,WAS,6,221,318,85,624
5878,WAS,7,810,507,226,1543
5879,WAS,8,764,696,175,1635


In [7]:
#Reformats precinct numbers for merge
for county in county_df_dict:
    county_df_dict[county] = county_df_dict[county].rename(columns = {"Unique PCT Identifier":"Precinct Number"})
    county_df_dict[county]['Precinct Number'] = county_df_dict[county]['Precinct Number'].map(str)
    county_df_dict[county]['Precinct Number'] = county_df_dict[county]['Precinct Number'].map(remove_zeros)  

In [8]:
#Separates precinct data into a dictionary of data frames.
precinct_dict={}
for county in counties:
    precinct_dict[county]=precinct_df[precinct_df['County Code']==county]

In [9]:
#Merges precinct data into elections data for each county.
for county in counties:
    county_df_dict[county] = pd.merge(county_df_dict[county],precinct_dict[county],
                                      on=["Precinct Number","County Code"],
                                      how="inner")
    
    county_df_dict[county] = county_df_dict[county].rename(columns = {"Republican Party of Florida":"Registered Republicans",
                                     "Florida Democratic Party":"Registered Democrats",
                                     "Other":"Registered Other"})

In [10]:
#Re-orders columns and drops redundant columns.
for county in counties:
    county_df_dict[county]=county_df_dict[county][["County Code",
                                "Election Date",
                                "Precinct Number",
                                "Precinct Polling Location",
                                "Registered Republicans",
                                "Registered Democrats",
                                "Registered Other",
                                "Total Registered",
                                "Contest Name",
                                "District",
                                "Candidate",
                                "Party",
                                "Vote Total"]]

In [11]:
#Combines all dataframes into one
election_list = [county_df_dict[x] for x in county_df_dict]
election_data=pd.concat(election_list,keys=None,ignore_index=True)

In [12]:
#Removes commas from string representations of integers greater than 1000, casts to int.
for col in ['Registered Republicans','Registered Democrats','Registered Other','Total Registered','Vote Total']:
    election_data[col]=[comma_del(x) for x in election_data[col]]

In [13]:
#Replaces null values in Total_Registered with sum of Registered voters by party
for idx in election_data[election_data['Total Registered'].isnull()].index.tolist():
    election_data.loc[idx,'Total Registered']=election_data.loc[idx,'Registered Republicans']+election_data.loc[idx,'Registered Democrats']+election_data.loc[idx,'Registered Other']

In [14]:
# Adds a column which combines county abbreviation and precinct number
election_data["Precinct_Full_ID"]= election_data["County Code"] + "--" + election_data["Precinct Number"].map(str)

In [39]:
#OUTPUT: #election_data.to_csv("election_data_clean.csv")

In [None]:
#OUTPUT: #election_data.to_html("election_data_clean.html")