In [20]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from scipy.stats import linregress
import datetime
from sqlalchemy import create_engine
import psycopg2

In [21]:
#Reading in Elections CSV files
election_2016_path = "data/countypres_2000-2016.csv"
election_2016_df = pd.read_csv(election_2016_path)

election_2020_path= "data/pres20results.csv"
election_2020_df = pd.read_csv(election_2020_path)

In [22]:
#Adding a year column to the election 2020 dataframe
election_2020_df['year']=np.nan

#Filling in the year as 2020 in the 2020 dataframe
election_2020_df['year']=election_2020_df.year.fillna(2020)


In [23]:
election_2016_df.dtypes

year                int64
state              object
state_po           object
county             object
FIPS              float64
office             object
candidate          object
party              object
candidatevotes    float64
totalvotes          int64
version             int64
dtype: object

In [24]:
#eliminating unecessary columns for election 2016 dataframe
election_2016_df = election_2016_df.drop(columns=['state_po','FIPS','office','version','totalvotes'])

In [25]:
#renaming total_votes to candidatevotes in election_2020_df
election_2020_df = election_2020_df.rename(columns={'total_votes': 'candidatevotes'})

#dropping unecessary columns for election 2020 dataframe
election_2020_df=election_2020_df.drop(columns=['won'])

In [26]:
election_2020_df.dtypes

state              object
county             object
candidate          object
party              object
candidatevotes      int64
year              float64
dtype: object

In [27]:
#merge the 2 dataframes with an outer join
election_merged_df = pd.merge(election_2016_df, election_2020_df, how='outer')

In [28]:
election_merged_df.dtypes

year                int64
state              object
county             object
candidate          object
party              object
candidatevotes    float64
dtype: object

In [29]:
#getting rid of all presidential data prior to 2016
election_merged_df.drop(election_merged_df[election_merged_df['year'] < 2016].index, inplace = True) 

In [30]:
#converting democrat and republican from 2016 files to match 2020 data
election_merged_df = election_merged_df.replace({'democrat':"DEM",'republican':"REP"})

In [31]:
#dropping all write in candidates from 2020 data
election_merged_df.drop(election_merged_df[election_merged_df['party'] == "WRI"].index, inplace = True) 

In [34]:
#dropping all candidates that had 0 votes in a county 
election_merged_df.drop(election_merged_df[election_merged_df['candidatevotes'] == 0].index, inplace = True)

#dropping all candidates that had less than 10 votes in a county
election_merged_df.drop(election_merged_df[election_merged_df['candidatevotes'] < 10].index, inplace = True) 

In [36]:
#write the df to a csv file 
election_merged_df.to_csv("election_merged_df.csv")