## Heatmap of 2020 Presidential Polls on US map

### load packages

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

### load csv file

In [3]:
pres_polls_df = pd.read_csv('../data/clean_data/2020/pres_polls_20201011.csv',sep=';')
pres_polls_df.head()

Unnamed: 0,id,Day,Len,State,EV,Dem,GOP,Ind,Date,Pollster
0,1,275.5,4,Alabama,9,37,57,,Oct-03,Auburn U. at Montgomery-4
1,2,210.5,10,Alabama,9,36,58,,Aug-02,Morning Consult-10
2,3,187.5,8,Alabama,9,41,55,,Jul-09,Auburn U. at Montgomery-8
3,4,36.0,3,Alabama,9,38,58,,Feb-06,Mason-Dixon-3
4,5,273.5,10,Alaska,3,46,50,,Oct-04,Alaska Survey Research-10


In [4]:
xls = pd.ExcelFile('../ERD/data_ERD/Important tables-2016.xlsx')
state_abbrev = pd.read_excel(xls, '2016 pres results')
state_abbrev.head()

Unnamed: 0,STATE,STATE ABBREVIATION,FIRST NAME,LAST NAME,"LAST NAME, FIRST",PARTY,GENERAL RESULTS,GENERAL %,WINNER INDICATOR
0,Alabama,AL,Donald J.,Trump,"Trump, Donald J.",REP,1318255.0,0.620831,W
1,Alabama,AL,Hillary,Clinton,"Clinton, Hillary",DEM,729547.0,0.343579,
2,Alabama,AL,Gary,Johnson,"Johnson, Gary",IND,44467.0,0.020942,
3,Alabama,AL,,Scattered,"Scattered,",W,21712.0,0.010225,
4,Alabama,AL,Jill,Stein,"Stein, Jill",IND,9391.0,0.004423,


In [5]:
# remove columns in pres_polls_df - updated to one line for ease
pres_polls_df.drop(['id', 'Day', 'Len', 'Ind', 'EV', 'Date', 'Pollster'], axis='columns', inplace=True)
pres_polls_df

Unnamed: 0,State,Dem,GOP
0,Alabama,37,57
1,Alabama,36,58
2,Alabama,41,55
3,Alabama,38,58
4,Alaska,46,50
...,...,...,...
621,Wisconsin,42,49
622,Wisconsin,42,44
623,Wisconsin,49,45
624,Wisconsin,46,41


In [6]:
# drop columns, rename columns, remove duplicates, reset index in state_abbrev
state_abbrev.drop(state_abbrev.columns[2:9], axis='columns', inplace=True)
state_abbrev.rename(columns={'STATE':'State'}, inplace=True)
state_abbrev.rename(columns={'STATE ABBREVIATION':'State_Abbreviation'}, inplace=True)
state_abbrev = state_abbrev.drop_duplicates('State')
state_abbrev = state_abbrev.reset_index(drop=True)
state_abbrev

Unnamed: 0,State,State_Abbreviation
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA
5,Colorado,CO
6,Connecticut,CT
7,Delaware,DE
8,District of Columbia,DC
9,Florida,FL


In [7]:
# pivot data to find aggregated mean of the party preference in each State
state_avg = pres_polls_df.groupby('State', as_index=False).agg('mean')
state_avg['Dem'] = state_avg['Dem'].astype(int)
state_avg['GOP'] = state_avg['GOP'].astype(int)
state_avg

Unnamed: 0,State,Dem,GOP
0,Alabama,38,57
1,Alaska,45,49
2,Arizona,48,43
3,Arkansas,45,47
4,California,60,30
5,Colorado,51,39
6,Connecticut,51,33
7,D.C.,93,4
8,Delaware,56,36
9,Florida,48,44


In [8]:
# adding State_Abbreviations to pres_polls_df
pp_avg = pd.merge(state_avg, state_abbrev, on = 'State', how = 'left')
pp_avg

Unnamed: 0,State,Dem,GOP,State_Abbreviation
0,Alabama,38,57,AL
1,Alaska,45,49,AK
2,Arizona,48,43,AZ
3,Arkansas,45,47,AR
4,California,60,30,CA
5,Colorado,51,39,CO
6,Connecticut,51,33,CT
7,D.C.,93,4,
8,Delaware,56,36,DE
9,Florida,48,44,FL


In [9]:
# finding party preference where Dem is shown as positive percentage and GOP is shown as negative percentage
pp_avg['party_pref'] = np.where((pp_avg['Dem'] >= pp_avg['GOP']), 
                                       pp_avg['Dem']/100, ~ pp_avg['GOP']/100)
pp_avg

Unnamed: 0,State,Dem,GOP,State_Abbreviation,party_pref
0,Alabama,38,57,AL,-0.58
1,Alaska,45,49,AK,-0.5
2,Arizona,48,43,AZ,0.48
3,Arkansas,45,47,AR,-0.48
4,California,60,30,CA,0.6
5,Colorado,51,39,CO,0.51
6,Connecticut,51,33,CT,0.51
7,D.C.,93,4,,0.93
8,Delaware,56,36,DE,0.56
9,Florida,48,44,FL,0.48


### create US map

In [10]:
fig = px.choropleth(pp_avg,  # Input Pandas DataFrame
                    locations='State_Abbreviation',  # DataFrame column with locations
                    color='party_pref', # DataFrame column with color value
                    color_continuous_scale=px.colors.diverging.RdBu, # from https://plotly.com/python/builtin-colorscales/
                    color_continuous_midpoint=0, # party preference midpoint
                    hover_name='State', # DataFrame column hover info
                    hover_data=['Dem','GOP'],
                    locationmode = 'USA-states', # Set to plot as US States
                    labels={'party_pref':'Party Preference'})

fig.update_layout(
    title_text='2020 President Polls as at 11th October 2020', 
    geo_scope='usa')  # Plot only the USA instead of globe

                    
fig.show()