In [1]:
# !pip install plotly

In [103]:
import pandas as pd
import numpy as np 
import plotly.express as px
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [132]:
# Load csv
polls = pd.read_csv("../data/archive/electoral_vote_pres_polls_20201011.csv")
geo = pd.read_csv("../data/archive/us_state_geo_location.csv")
abbre = pd.read_csv("../data/archive/us_state_abbreviations.csv")

In [105]:
# Select Columns
polls = polls[['Day', 'State', 'EV', 'Dem', 'GOP']]

In [106]:
# Merge two datasets
polls_geo = pd.merge(polls, geo, how = 'inner')
polls_geo

Unnamed: 0,Day,State,EV,Dem,GOP,Latitude,Longitude
0,275.5,Alabama,9,37,57,32.318230,-86.902298
1,210.5,Alabama,9,36,58,32.318230,-86.902298
2,187.5,Alabama,9,41,55,32.318230,-86.902298
3,36.0,Alabama,9,38,58,32.318230,-86.902298
4,273.5,Alaska,3,46,50,66.160507,-153.369141
...,...,...,...,...,...,...,...
620,46.0,Wisconsin,10,42,49,44.500000,-89.500000
621,43.0,Wisconsin,10,42,44,44.500000,-89.500000
622,10.0,Wisconsin,10,49,45,44.500000,-89.500000
623,6.5,Wisconsin,10,46,41,44.500000,-89.500000


In [107]:
# Get dataframe sorted by Day in each state
polls_geo = polls_geo.groupby(['State']).apply(lambda x: x.sort_values(["Day"], ascending = False)).reset_index(drop = True)

In [108]:
# Select first & latest polls within each state
polls_geo_first = polls_geo.groupby('State').tail(1)
polls_geo_late = polls_geo.groupby('State').head(1)

In [109]:
# Add 'Stage' column stage to distinguish first & latest polls
polls_geo_late['Stage'] = "Latest"
polls_geo_first['Stage'] = "First"

In [110]:
# Append two dataframes
polls_geo_all = polls_geo_first.append(polls_geo_late, ignore_index = True)
polls_geo_all

Unnamed: 0,Day,State,EV,Dem,GOP,Latitude,Longitude,Stage
0,36.0,Alabama,9,38,58,32.318230,-86.902298,First
1,182.0,Alaska,3,48,49,66.160507,-153.369141,First
2,3.0,Arizona,11,46,46,34.048927,-111.093735,First
3,161.5,Arkansas,6,45,47,34.799999,-92.199997,First
4,15.0,California,55,59,35,36.778259,-119.417931,First
...,...,...,...,...,...,...,...,...
95,259.0,Virginia,13,48,43,37.926868,-78.024902,Latest
96,255.0,Washington,12,58,36,47.751076,-120.740135,Latest
97,273.5,West Virginia,5,38,56,39.000000,-80.500000,Latest
98,279.5,Wisconsin,10,51,41,44.500000,-89.500000,Latest


In [175]:
# Plot comparison
fig = px.scatter_geo(polls_geo_all,
                     lat = "Latitude",
                     lon = "Longitude",
                     color = "GOP",
                     hover_name = "State",
                     size = "EV",
                     scope = 'usa',
                     labels = {'GOP':'% Preference to Republican Party'},
                     title = 'The U.S. President Polls Electoral Vote in 2020 (by 11th October)',
                     animation_frame = "Stage")
fig.show()

In [138]:
# Find the change in percentage between first & latest polls
polls_dif = polls_geo.groupby('State').head(1).set_index(['State']).subtract(polls_geo.groupby('State').tail(1).set_index(['State']), fill_value = 0).reset_index()
polls_dif = polls_dif[['State','Dem','GOP']]
polls_dif = pd.merge(polls_dif, abbre)
polls_dif

Unnamed: 0,State,Dem,GOP,Abbrev,Code
0,Alabama,-1,-1,Ala.,AL
1,Alaska,-2,1,Alaska,AK
2,Arizona,3,-3,Ariz.,AZ
3,Arkansas,0,0,Ark.,AR
4,California,0,-3,Calif.,CA
5,Colorado,4,-3,Colo.,CO
6,Connecticut,0,-4,Conn.,CT
7,Delaware,-2,-7,Del.,DE
8,Florida,-2,-5,Fla.,FL
9,Georgia,4,-5,Ga.,GA


In [156]:
# Add 'Party' column stage to distinguish GOP & Dem
polls_dif_GOP = polls_dif[['State','GOP','Code']]
polls_dif_GOP['Party'] = "GOP"
polls_dif_GOP.rename(columns = {'GOP':'Percentage'}, inplace = True)
polls_dif_Dem = polls_dif[['State','Dem','Code']]
polls_dif_Dem['Party'] = "Dem"
polls_dif_Dem.rename(columns = {'Dem':'Percentage'}, inplace = True)

In [157]:
# # Append two dataframes
polls_dif_all = polls_dif_GOP.append(polls_dif_Dem, ignore_index = True)
polls_dif_all

Unnamed: 0,State,Percentage,Code,Party
0,Alabama,-1,AL,GOP
1,Alaska,1,AK,GOP
2,Arizona,-3,AZ,GOP
3,Arkansas,0,AR,GOP
4,California,-3,CA,GOP
...,...,...,...,...
95,Virginia,0,VA,Dem
96,Washington,1,WA,Dem
97,West Virginia,0,WV,Dem
98,Wisconsin,5,WI,Dem


In [167]:
# Plot comparison
fig1 = px.choropleth(polls_dif_all,
                     locations = "Code",
                     locationmode = 'USA-states',
                     color = "Percentage",
                     color_continuous_scale = "RdBu",
                     range_color = (-10, 10),
                     scope = 'usa',
                     hover_name = "State",
                     labels = {'Percentage': '% Change'},
                     title = 'The Percentage Change between First and Latest Electoral Vote (by 11th October 2020)',
                     animation_frame = "Party")
fig1.show()