# UK Election Results Graphical Analysis

This aim of this project was to help build familiarity with the popular web scraping library BeautifulSoup4 and graphing library Matplotlib both of which are commonly used in data science projects. 

For this project I chose to gather election data of the last 10 UK elections and display them graphically in a way that allows the reader to easily see how:
    1) The total number of votes changed over the last 10 elections,
    2) The relative share of the vote for each of the main 3 parties changed over the last 10 elections.
    
    

In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
%matplotlib notebook

In [None]:
#Extracting UK election data for last 10 elections

df_total = pd.Series()
df = pd.DataFrame()

url = 'https://en.wikipedia.org/wiki/Results_breakdown_of_the_United_Kingdom_general_election,_2017'
df17 = pd.read_html(url,header=0)[3]
df17['Year'] = 2017
df17['%'] = df15['%'].apply(lambda x : float(str(x).rstrip('%')))
df = df.append(df17[['Party','%','Seats','Year']])
df_total.loc[2017] = 39316335 *0.68

url = 'http://www.ukpolitical.info/2015.htm'
df15 = pd.read_html(url,header=0)[1]
df15['Year'] = 2015
df = df.append(df15[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('div',{'class':'col-xs-6'}).find_all('li')
electorate15 = text[3].text.split(' ')[2].replace(',','')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
turnout_percent15 = text[0].text.split(' ')[2].split('%')[0]
turnout = int(electorate15) * float(turnout_percent15) * 0.01
df_total.loc[2015] = int(turnout)


url = 'http://www.ukpolitical.info/2010.htm'
df10 = pd.read_html(url,header=0)[1] 
df10['Year'] = 2010
df = df.append(df10[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('div',{'class':'col-xs-6'}).find_all('li')
electorate10 = text[3].text.split(' ')[2].replace(',','')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
turnout_percent10 = text[0].text.split(' ')[2].rstrip('%')
turnout = int(electorate10) * float(turnout_percent10) * 0.01
df_total.loc[2010] = int(turnout)


url = 'http://www.ukpolitical.info/2005.htm'
df05 = pd.read_html(url,header=0)[1]
df05['Year'] = 2005
df = df.append(df05[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
electorate05 = text[0].text.split(' ')[2].replace(',','')
turnout_percent05 = text[1].text.split(' ')[2].split('%')[0]
turnout = int(electorate05) * float(turnout_percent05) * 0.01
df_total.loc[2005] = int(turnout)

url = 'http://www.ukpolitical.info/2001.htm'
df01 = pd.read_html(url,header=0)[1]
df01['Year'] = 2001
df = df.append(df01[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
electorate01 = text[0].text.split(' ')[2].replace(',','')
turnout_percent01 = text[1].text.split(' ')[2].split('%')[0]
turnout = int(electorate01) * float(turnout_percent01) * 0.01
df_total.loc[2001] = turnout

url = 'http://www.ukpolitical.info/1997.htm'
df97 = pd.read_html(url,header=0)[1]
df97['Year'] = 1997
df = df.append(df97[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
electorate97 = text[0].text.split(' ')[2].replace(',','')
turnout_percent97 = text[1].text.split(' ')[2].split('%')[0]
turnout = int(electorate97) * float(turnout_percent97) * 0.01
df_total.loc[1997] = turnout

url = 'http://www.ukpolitical.info/1992.htm'
df92 = pd.read_html(url,header=0)[1]
df92['Year'] = 1992
df = df.append(df92[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
electorate92 = text[0].text.split(' ')[2].replace(',','')
turnout_percent92 = text[1].text.split(' ')[2].split('%')[0]
turnout = int(electorate92) * float(turnout_percent92) * 0.01
df_total.loc[1992] = turnout

url = 'http://www.ukpolitical.info/1987.htm'
df87 = pd.read_html(url,header=0)[1]
df87['Year'] = 1987
df = df.append(df87[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
electorate87 = text[0].text.split('–')[1].replace(',','')
turnout_percent87 = text[1].text.split(' ')[2].split('%')[0]
turnout = int(electorate87) * float(turnout_percent87) * 0.01
df_total.loc[1987] = turnout

url = 'http://www.ukpolitical.info/1983.htm'
df83 = pd.read_html(url,header=0)[1]
df83['Year'] = 1983
df = df.append(df83[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
electorate83 = text[0].text.split(' ')[2].replace(',','')
turnout_percent83 = text[1].text.split(' ')[2].split('%')[0]
turnout = int(electorate83) * float(turnout_percent83) * 0.01
df_total.loc[1983] = turnout

url = 'http://www.ukpolitical.info/1979.htm'
df79 = pd.read_html(url,header=0)[1]
df79['Year'] = 1979
df = df.append(df79[['Party','%','Seats','Year']])
website_url = requests.get(url).content
soup = BeautifulSoup(website_url, 'lxml')
text = soup.find('a',{'href':'Turnout45.htm'}).find_previous('div',{'class':'col-xs-6'}).find_all('li')
electorate79 = text[0].text.split('–')[1].replace(',','')
turnout_percent79 = text[1].text.split(' ')[2].split('%')[0]
turnout = int(electorate79) * float(turnout_percent79) * 0.01
df_total.loc[1979] = turnout

df = df.reset_index(drop=True)
pd.DataFrame.to_csv(df, path_or_buf='/Users/niral/Documents/ukelectiondata.csv')
pd.Series.to_csv(df_total, path='/Users/niral/Documents/ukelectiontotals.csv')

In [None]:
#Formatting the scraped data. 

# As work was done on Coursera platform data was scraped and uploaded as csv file.
df = pd.read_csv('ukelectiondata.csv')
df_totals = pd.read_csv('ukelectiontotals.csv', header=None)
df_totals.columns = ['Year', 'Total Votes']
df_totals.set_index('Year', inplace= True)
df.drop('Unnamed: 0',axis=1, inplace=True)
df['Party'].iloc[78] = 'Liberal Democrat'
df['Party'].iloc[98] = 'Liberal Democrat'
df['Party'].iloc[88] = 'Liberal Democrat'
df['Votes'] = ''
df = df[(df['Party'] == 'Conservative') | (df['Party'] == 'Labour') | (df['Party'] == 'Liberal Democrat')]


years = df_totals.index.sort_values().values
parties = df['Party'].unique()
lab = []
con = []
lib = []
other = ((0.01*(100 - df.groupby('Year')['%'].agg('sum'))).tolist() *  df_totals['Total Votes'].sort_index(level=0).values) / 1000000 

for y in years:
    for p in parties:
        v = (df.loc[(df['Year'] == y) & (df['Party'] == p),'%'].iloc[0] * df_totals.loc[y].iloc[0] * 0.01) / 1000000 
        if(p == 'Conservative'):             
            con.append(v)
        if(p == 'Labour'):
            lab.append(v)
        if(p == 'Liberal Democrat'):
            lib.append(v)        



In [6]:
#Creating the visualization from the scraped data.
fig, ax = plt.subplots()
index = np.arange(len(df_totals))
bar_width = 0.35
opacity = 0.8
labels = [str(x) for x in df_totals.index.sort_values().values]

other_lab = [x + y for x, y in zip(other, lab)]
other_lab_con = [x + y for x, y in zip(other_lab, con)]

rects1 = ax.bar(index,tuple(other),bar_width,alpha=opacity,color='grey',label='Other Parties')
rects2 = ax.bar(index,tuple(lab),bar_width, bottom=tuple(other),alpha=opacity,color='red',label='Labour')
rects3 = ax.bar(index,tuple(con),bar_width, bottom=(tuple(other_lab)),alpha=opacity,color='blue',label='Conservative')
rects4 = ax.bar(index,tuple(lib),bar_width, bottom=(tuple(other_lab_con)),alpha=opacity,color='orange',label='Liberal Democrats')

ax.set_ylim(bottom=0,top=45)
ax.set_xticks(index)
ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels((labels))
ax.set_xlabel('Election Year')
ax.set_ylabel('Votes (Millions)')
ax.legend()
ax.set_title('UK election results split by party over last 10 elections')


<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x7f812fd14128>