In [132]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from scipy.stats import linregress
import datetime
from sqlalchemy import create_engine
import psycopg2

In [133]:
#Reading in Elections CSV files
election_2016_path = "data/countypres_2000-2016.csv"
election_2016_df = pd.read_csv(election_2016_path)

election_2020_path= "data/pres20results.csv"
election_2020_df = pd.read_csv(election_2020_path)

election_2020_total_votes= "data/president_county20.csv"
election_2020_total_votes_df = pd.read_csv(election_2020_total_votes)

In [134]:
#Adding a year column to the election 2020 dataframe
election_2020_df['year']=np.nan

#Filling in the year as 2020 in the 2020 dataframe
election_2020_df['year']=election_2020_df.year.fillna(2020)


In [135]:
election_2016_df.dtypes

year                int64
state              object
state_po           object
county             object
FIPS              float64
office             object
candidate          object
party              object
candidatevotes    float64
totalvotes          int64
version             int64
dtype: object

In [136]:
#eliminating unecessary columns for election 2016 dataframe
election_2016_df = election_2016_df.drop(columns=['state_po','FIPS','office','version'])

In [137]:
election_2020_total_votes_df = election_2020_total_votes_df.rename(columns={'current_votes': 'totalvotes'})

In [138]:
#cleaning data from the 2020 total votes dataframe
election_2020_total_votes_df = election_2020_total_votes_df.drop(columns=['total_votes','percent'])

In [139]:
#mergring total votes into the 2020 candidate results information
election_2020_merged_df = pd.merge(election_2020_df, election_2020_total_votes_df, how='outer')

In [140]:
#cleaning data and getting rid of win column
election_2020_merged_df = election_2020_merged_df.drop(columns=['won'])

election_2020_merged_df = election_2020_merged_df.rename(columns={'total_votes' : 'candidatevotes'})

In [141]:
#confirming the total counties in each voting year match
county_count_2016 = len(election_2016_df["county"].unique())
county_count_2020 = len(election_2016_df["county"].unique())
print(county_count_2016)
print(county_count_2020)

1854
1854


In [142]:
#merge the 2 dataframes with an outer join
election_merged_df = pd.merge(election_2016_df, election_2020_merged_df, how='outer')

In [143]:
#getting rid of all presidential data prior to 2016
election_merged_df.drop(election_merged_df[election_merged_df['year'] < 2016].index, inplace = True) 

In [144]:
#converting democrat and republican from 2016 files to match 2020 data
election_merged_df = election_merged_df.replace({'democrat':"DEM",'republican':"REP"})

In [145]:
#dropping all write in candidates from 2020 data
election_merged_df.drop(election_merged_df[election_merged_df['party'] == "WRI"].index, inplace = True) 

In [146]:
#dropping all candidates that had 0 votes in a county 
election_merged_df.drop(election_merged_df[election_merged_df['candidatevotes'] == 0].index, inplace = True)

In [147]:
remove = ['LIB', 'GRN', 'WRI', 'PSL', 'IND', 'ALI', 'CST',
       'ASP', 'OTH', 'UTY', 'LLC', 'SWP', 'BAR', 'PRO', 'NON', 'PRG',
       'UNA', 'BMP', 'GOP', 'BFP', 'APV', 'IAP', 'LLP', 'SEP']
election_merged_df = election_merged_df[~election_merged_df['party'].isin(remove)]

In [153]:
election_merged_df = election_merged_df[~election_merged_df.candidate.str.contains("Other")]

In [148]:
#write the df to a csv file 
election_merged_df.to_csv("election_merged_df.csv")