In [1]:
import re
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from bqplot import *
from bqplot import pyplot as plt
from ipywidgets import Layout

## Scrape and organize the Data

In [2]:
url = 'https://www.politico.com/mapdata-2016/2016-election/primary/results/map/president/'

In [3]:
from fake_useragent import UserAgent
ua = UserAgent()
header = {'User-Agent':str(ua.chrome)}
htmlContent = requests.get(url, headers=header)

In [4]:
soup = BeautifulSoup(htmlContent.text, 'html.parser')
x =  soup.find_all('article',attrs="timeline-group")

In [5]:
df_results = pd.DataFrame(columns = ['electiontype','state','date','party','candidate','votes','%votes','delegates'])
for i in x:
    state = i.find_all('h3')[0].get_text().strip()
    electiontype = i.find_all('h4')[0].get_text().replace('\n','').replace(' ','').replace(',','').replace('Presidential','')
    date = i.find_all('p',attrs="timestamp")[0].get_text().strip()
    dem = i.find_all('div',attrs=re.compile(r'results-data pos-.* contains-democrat'))[0].find_all('tr',attrs="type-democrat")
    for j in dem:
        candidate = j.find_all('span',attrs="name-combo")[0].get_text()
        percentage = j.find_all('span',attrs="percentage-combo")[0].get_text()
        votes = int(j.find_all('td',attrs="results-popular")[0].get_text().replace(',',''))
        y = j.find_all('td',attrs="delegates-cell")
        try:
            delegates = y[0].get_text()
        except:
            delegates = np.nan
        df_results=df_results.append(pd.Series([electiontype,state,date,'DEM',candidate,votes,percentage,delegates],
                                               index = df_results.columns),ignore_index=True)
    
    try:
        rep = i.find_all('div',attrs=re.compile(r'results-data pos-.* contains-republican'))\
        [0].find_all('tr',attrs="type-republican")# In case there is no data.
        for j in rep:
            candidate = j.find_all('span',attrs="name-combo")[0].get_text()
            percentage = j.find_all('span',attrs="percentage-combo")[0].get_text()
            votes = int(j.find_all('td',attrs="results-popular")[0].get_text().replace(',',''))
            y = j.find_all('td',attrs="delegates-cell")
            try:
                delegates = y[0].get_text()
            except:
                delegates = np.nan
            df_results=df_results.append(pd.Series([electiontype,state,date,'GOP',candidate,votes,percentage,delegates],
                                                   index = df_results.columns),ignore_index=True)
    except:
        pass

In [6]:
def cleantype(x):
    '''
    To make electiontype the same.
    '''
    if x=='Primary' or x=='Caucus':
        return x
    elif x == 'Caucuses':
        return 'Caucus'
    else:
        return 'Primary'

In [7]:
df_results['electiontype'] = df_results['electiontype'].apply(cleantype)
df_results = df_results.set_index(['electiontype','state','date','party'])
df_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,candidate,votes,%votes,delegates
electiontype,state,date,party,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Primary,Alabama,"March 1, 2016",DEM,Winner H. Clinton,309928,77.8%,51
Primary,Alabama,"March 1, 2016",DEM,B. Sanders,76399,19.2%,9
Primary,Alabama,"March 1, 2016",DEM,Uncommitted,9534,2.4%,
Primary,Alabama,"March 1, 2016",DEM,M. O'Malley,1489,0.4%,
Primary,Alabama,"March 1, 2016",DEM,R. De La Fuente,814,0.2%,
...,...,...,...,...,...,...,...
Caucus,Wyoming,"March 12, 2016",GOP,Uncommitted,68,7.0%,4
Caucus,Wyoming,"March 12, 2016",GOP,B. Carson,0,0.0%,
Caucus,Wyoming,"March 12, 2016",GOP,J. Kasich,0,0.0%,
Caucus,Wyoming,"March 12, 2016",GOP,Other,0,0.0%,


## Visualize the data using BQPlot

In [8]:
state_codes = pd.read_csv('state_codes.csv')

### For DEM primaries

In [9]:
Winner_DEM = pd.DataFrame(df_results.loc[('Primary',slice(None),slice(None),'DEM'),:].groupby(level = 1).\
    apply(lambda x: x.sort_values(by=['votes'],ascending=False).iloc[0]['candidate']))
# Build a map from state code to the winner
color_data_DEM = dict(zip(pd.merge(state_codes.set_index('Name'),Winner_DEM,left_index=True, right_index=True)['FIPS'].values,
                      pd.merge(state_codes.set_index('Name'),Winner_DEM,left_index=True, right_index=True)[0].values))


In [10]:
#Draw US map
sc_geo_DEM = AlbersUSA()
sc_c1_DEM = OrdinalColorScale(domain=['B. Sanders','H. Clinton'], colors=[ '#83bc5e','#5fa0d6'])


map_styles_DEM = {'color': color_data_DEM,
              'scales': {'projection': sc_geo_DEM, 'color': sc_c1_DEM}, 'colors': {'default_color': 'Grey'}}

axis_DEM = ColorAxis(scale=sc_c1_DEM)

states_map_DEM = Map(map_data=topo_load('map_data/USStatesMap.json'),**map_styles_DEM)
map_fig_DEM = Figure(marks=[states_map_DEM], axes=[axis_DEM],
                 fig_margin = {'top':30, 'bottom':30, 'left':30, 'right':30},
                 title='Winner from Primary of Democratic - State Wise')

In [11]:
def get_state_data_DEM(name):#get state election data
    try:
        return df_results.loc[('Primary',name,slice(None),'DEM'),:]
    except:
        return None

In [12]:
#Draw Bar plot.
names_sc_DEM = OrdinalScale(domain=['Obama', 'McCain'])
vote_sc_y_DEM = LinearScale()
labels_DEM = Label(scales={"x":vote_sc_y_DEM,"y":names_sc_DEM},orientation="horizontal",colors=["#000000"])
names_ax_DEM = Axis(scale=names_sc_DEM,orientation='vertical')
vote_ax_DEM = Axis(scale=vote_sc_y_DEM, label='Votes Received')

vote_bars_DEM = Bars(scales={'x':names_sc_DEM, 'y': vote_sc_y_DEM},orientation="horizontal")

bar_fig_DEM = Figure(marks=[vote_bars_DEM,labels_DEM], axes=[vote_ax_DEM,names_ax_DEM], title='Votes Received')

In [13]:
def hover_callback_DEM(name, value):
    polls_DEM = get_state_data_DEM(value['data']['name'])
    vote_bars_DEM.y = [0.]
    labels_DEM.text = None
    labels_DEM.x = None
    labels_DEM.y = None
    if polls_DEM is None or polls_DEM.shape[0] == 0:
        bar_fig_DEM.title=('Votes Received - '+value['data']['name'])
        return
    names_sc_DEM.domain = list(polls_DEM['candidate'].values)
    bar_fig_DEM.title=('Votes Received - '+value['data']['name'])
    vote_bars_DEM.x = list(polls_DEM['candidate'].values)
    vote_bars_DEM.y = list(polls_DEM['votes'].values)
    labels_DEM.x = np.zeros(len(polls_DEM))
    labels_DEM.y = vote_bars_DEM.x
    labels_DEM.text = list(polls_DEM['votes'].values)

In [14]:
states_map_DEM.on_hover(hover_callback_DEM)
states_map_DEM.tooltip = bar_fig_DEM

In [15]:
map_fig_DEM

Figure(axes=[ColorAxis(scale=OrdinalColorScale(colors=['#83bc5e', '#5fa0d6'], domain=['B. Sanders', 'H. Clinto…

### For GOP primaries

In [16]:
Winner_GOP = pd.DataFrame(df_results.loc[('Primary',slice(None),slice(None),'GOP'),:].groupby(level = 1).\
    apply(lambda x: x.sort_values(by=['votes'],ascending=False).iloc[0]['candidate']))
color_data_GOP = dict(zip(pd.merge(state_codes.set_index('Name'),Winner_GOP,left_index=True, right_index=True)['FIPS'].values,
                      pd.merge(state_codes.set_index('Name'),Winner_GOP,left_index=True, right_index=True)[0].values))

In [17]:
sc_geo_GOP = AlbersUSA()
sc_c1_GOP = OrdinalColorScale(domain=["D. Trump", "T. Cruz", "J. Kasich"], colors=[ "#ba3b3f","#cfa41f","#1a8888"])


map_styles_GOP = {'color': color_data_GOP,
              'scales': {'projection': sc_geo_GOP, 'color': sc_c1_GOP}, 'colors': {'default_color': 'Grey'}}

axis_GOP = ColorAxis(scale=sc_c1_GOP)

states_map_GOP = Map(map_data=topo_load('map_data/USStatesMap.json'),**map_styles_GOP)
map_fig_GOP = Figure(marks=[states_map_GOP], axes=[axis_GOP],
                 fig_margin = {'top':30, 'bottom':30, 'left':30, 'right':30},
                 title='Winner from Primary of Republicans - State Wise')

In [18]:
def get_state_data_GOP(name):
    return df_results.loc[('Primary',name,slice(None),'GOP'),:]

In [19]:
names_sc_GOP = OrdinalScale(domain=['Obama', 'McCain'])
vote_sc_y_GOP = LinearScale()
labels_GOP = Label(scales={"x":vote_sc_y_GOP,"y":names_sc_GOP},orientation="horizontal",colors=["#000000"])
names_ax_GOP = Axis(scale=names_sc_GOP,orientation='vertical')
vote_ax_GOP = Axis(scale=vote_sc_y_GOP, label='Votes Received')

vote_bars_GOP = Bars(scales={'x':names_sc_GOP, 'y': vote_sc_y_GOP},orientation="horizontal")

bar_fig_GOP = Figure(marks=[vote_bars_GOP,labels_GOP], axes=[vote_ax_GOP,names_ax_GOP], title='Votes Received')

In [20]:
def hover_callback_GOP(name, value):
    polls_GOP = get_state_data_GOP(value['data']['name'])
    vote_bars_GOP.y = [0.]
    labels_GOP.text = None
    labels_GOP.x = None
    labels_GOP.y = None
    if polls_GOP is None or polls_GOP.shape[0] == 0:
        bar_fig_GOP.title=('Votes Received - '+value['data']['name'])
        return
    names_sc_GOP.domain = list(polls_GOP['candidate'].values)
    bar_fig_GOP.title=('Votes Received - '+value['data']['name'])
    vote_bars_GOP.x = list(polls_GOP['candidate'].values)
    vote_bars_GOP.y = list(polls_GOP['votes'].values)
    labels_GOP.x = np.zeros(len(polls_GOP))
    labels_GOP.y = vote_bars_GOP.x
    labels_GOP.text = list(polls_GOP['votes'].values)

In [21]:
states_map_GOP.on_hover(hover_callback_GOP)
states_map_GOP.tooltip = bar_fig_GOP

In [22]:
map_fig_GOP

Figure(axes=[ColorAxis(scale=OrdinalColorScale(colors=['#ba3b3f', '#cfa41f', '#1a8888'], domain=['D. Trump', '…