In [1]:
import pandas as pd
import numpy as np

In [2]:
results = pd.read_excel("2015 Constituency Results.xlsx")
results.rename(columns={"Unnamed: 1": "Name"}, inplace=True)
results.head()

Unnamed: 0,Constituency,Name,Country,Region,Electorate,Turnout,Unnamed: 6,Candidate,Party,Votes,Share,Place
0,1,Aberavon,Wales,Wales,49821,31523,0.633,Stephen Kinnock,Labour,15416,0.489,1
1,1,Aberavon,Wales,Wales,49821,31523,0.633,Peter Bush,UKIP,4971,0.158,2
2,1,Aberavon,Wales,Wales,49821,31523,0.633,Edward Yi He,Conservative,3742,0.119,3
3,1,Aberavon,Wales,Wales,49821,31523,0.633,Duncan Higgitt,Plaid Cymru,3663,0.116,4
4,1,Aberavon,Wales,Wales,49821,31523,0.633,Helen Clarke,Liberal Democrat,1397,0.044,5


In [3]:
winners = results[results["Place"] == 1]
seconds = results[results["Place"] == 2]

df1 = pd.DataFrame({"Name":winners.Name, "win":winners.Votes,
                    "win party":winners.Party, "Electorate":winners.Electorate})
df2 = pd.DataFrame({"Name":seconds.Name, "2nd":seconds.Votes, "2nd party":seconds.Party})

margins = df1.join(df2.set_index("Name"), on="Name", how="inner")
margins["margin"] = margins["win"] - margins["2nd"]
margins["margin %"] = margins["margin"] / margins.Electorate * 100
margins.set_index("Name", inplace=True)
margins.head()

Unnamed: 0_level_0,Electorate,win,win party,2nd,2nd party,margin,margin %
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Aberavon,49821,15416,Labour,4971,UKIP,10445,20.965055
Aberconwy,45540,12513,Conservative,8514,Labour,3999,8.781291
Aberdeen North,67745,24793,Scottish National Party,11397,Labour,13396,19.774153
Aberdeen South,68056,20221,Scottish National Party,12991,Labour,7230,10.623604
Aberdeenshire West & Kincardine,73445,22949,Scottish National Party,15916,Conservative,7033,9.575873


# Wales

In [4]:
wales_names = results[results.Region=="Wales"].Name.unique()
wales = margins.ix[wales_names]
wales[wales["margin %"] <= 5]

Unnamed: 0_level_0,Electorate,win,win party,2nd,2nd party,margin,margin %
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bridgend,59998,14624,Labour,12697,Conservative,1927,3.211774
Cardiff North,67193,21709,Conservative,19572,Labour,2137,3.180391
Clwyd South,54996,13051,Labour,10649,Conservative,2402,4.36759
Gower,61820,15862,Conservative,15835,Labour,27,0.043675
Vale of Clwyd,56505,13760,Conservative,13523,Labour,237,0.419432
Wrexham,50992,12181,Labour,10350,Conservative,1831,3.590759
Ynys Mon,49944,10871,Labour,10642,Plaid Cymru,229,0.458514


# Scotland

In [5]:
scots_names = results[results.Region=="Scotland"].Name.unique()
scots = margins.ix[scots_names]
scots[scots["margin %"] <= 5]

Unnamed: 0_level_0,Electorate,win,win party,2nd,2nd party,margin,margin %
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Berwickshire, Roxburgh & Selkirk",74179,20145,Scottish National Party,19817,Conservative,328,0.442174
"Dumfriesshire, Clydesdale & Tweeddale",68483,20759,Conservative,19961,Scottish National Party,798,1.165253
Dunbartonshire East,66966,22093,Scottish National Party,19926,Liberal Democrat,2167,3.23597
Edinburgh South,65846,19293,Labour,16656,Scottish National Party,2637,4.004799
Edinburgh West,71749,21378,Scottish National Party,18168,Liberal Democrat,3210,4.47393
Orkney & Shetland,34551,9407,Liberal Democrat,8590,Scottish National Party,817,2.36462


# Running out of time; make a prediction

- UKIP vote will mostly swing back to 2/3 Labour and 1/3 Tories
- In Scotland, 15% swing away from SNP to next highest party (which doesn't make much difference!!)
- In England, 10% swing to LibDems, and 5% swing to Tory

Quite how I interpret these is documented in code (ahem!)

In [6]:
scots_pred = {}
for index, row in scots.iterrows():
    if row.ix["win party"] != "Scottish National Party":
        scots_pred[index] = row.ix["win party"]
    else:
        new_win_votes = row["win"] * 0.85
        new_2nd_votes = row["2nd"] + row["win"] * 0.15
        if new_2nd_votes > new_win_votes:
            scots_pred[index] = row.ix["2nd party"]
        else:
            scots_pred[index] = row.ix["win party"]

scots_pred = pd.DataFrame(list(scots_pred.items()), columns=["Name", "Party"])

In [15]:
other_names = results[results.Region!="Scotland"].Name.unique()
others = margins.ix[other_names]

In [8]:
parties = results.Party.unique()

out = {}
for party in parties:
    x = results[results.Party == party]
    out[party] = pd.DataFrame({"Name":x.Name, party:x.Votes})

In [42]:
df = out[parties[0]]
for party in parties[1:]:
    df = df.join(out[party].set_index("Name"), on="Name", how="outer")
df = df.set_index("Name")
df.head()

Unnamed: 0_level_0,Labour,UKIP,Conservative,Plaid Cymru,Liberal Democrat,Independent,Green Party,Socialist Labour Party,TUSC,Scottish National Party,...,The Eccentric Party of Great Britain,The Realists' Party,The Birthday Party,Wigan Independents,Wessex Regionalists,Reduce VAT in Sport,Give Me Back Elmo,Land Party,Magna Carta Conservation Party Great Britain,The Evolution Party
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aberavon,15416.0,4971.0,3742.0,3663.0,1397.0,1137.0,711.0,352.0,134.0,,...,,,,,,,,,,
Aberconwy,8514.0,3467.0,12513.0,3536.0,1391.0,,727.0,,,,...,,,,,,,,,,
Aberdeen North,11397.0,,5304.0,,2050.0,,,,206.0,24793.0,...,,,,,,,,,,
Aberdeen South,12991.0,897.0,11087.0,,2252.0,139.0,964.0,,,20221.0,...,,,,,,,,,,
Aberdeenshire West & Kincardine,2487.0,1006.0,15916.0,,11812.0,141.0,885.0,,,22949.0,...,,,,,,,,,,


In [45]:
# This is annoying: two "Independent".  But I don't think if affects the final result
df.ix["Witney"]

Unnamed: 0_level_0,Labour,UKIP,Conservative,Plaid Cymru,Liberal Democrat,Independent,Green Party,Socialist Labour Party,TUSC,Scottish National Party,...,The Eccentric Party of Great Britain,The Realists' Party,The Birthday Party,Wigan Independents,Wessex Regionalists,Reduce VAT in Sport,Give Me Back Elmo,Land Party,Magna Carta Conservation Party Great Britain,The Evolution Party
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Witney,10046.0,5352.0,35201.0,,3953.0,94.0,2970.0,,,,...,,,,,110.0,56.0,37.0,35.0,,
Witney,10046.0,5352.0,35201.0,,3953.0,12.0,2970.0,,,,...,,,,,110.0,56.0,37.0,35.0,,


In [46]:
df["Labour"] += df["UKIP"] * 0.2
df["Conservative"] += df["UKIP"] * 0.4
df["UKIP"] = df["UKIP"] * 0.4
df["Liberal Democrat"] *= 1.1
df["Conservative"] *= 1.05

In [47]:
winner = [row.argmax() for i, row in df.iterrows()]
df["winner"] = winner
df = df.ix[other_names]
df.head()

Unnamed: 0_level_0,Labour,UKIP,Conservative,Plaid Cymru,Liberal Democrat,Independent,Green Party,Socialist Labour Party,TUSC,Scottish National Party,...,The Realists' Party,The Birthday Party,Wigan Independents,Wessex Regionalists,Reduce VAT in Sport,Give Me Back Elmo,Land Party,Magna Carta Conservation Party Great Britain,The Evolution Party,winner
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aberavon,16410.2,1988.4,6016.92,3663.0,1536.7,1137.0,711.0,352.0,134.0,,...,,,,,,,,,,Labour
Aberconwy,9207.4,1386.8,14594.79,3536.0,1530.1,,727.0,,,,...,,,,,,,,,,Conservative
Aldershot,10118.6,3301.2,28003.71,,4483.6,,2025.0,,,,...,,,,,,,,,,Conservative
Aldridge-Brownhills,10385.2,3100.4,24841.32,,1463.0,,826.0,,,,...,,,,,,,,,,Conservative
Altrincham & Sale West,14290.4,1618.8,29809.29,,4658.5,,1983.0,,,,...,,,,,,,,,,Conservative


In [52]:
others_pred = pd.DataFrame({"Name":df.index, "Party":df.winner})

pred = pd.concat([scots_pred, others_pred])
pred.tail()

Unnamed: 0,Name,Party
Yeovil,Yeovil,Conservative
Ynys Mon,Ynys Mon,Labour
York Central,York Central,Labour
York Outer,York Outer,Conservative
Yorkshire East,Yorkshire East,Conservative


In [79]:
def get_con_number(name):
    x = set(results[results.Name == name].Constituency)
    assert len(x) == 1
    return list(x)[0]

names = results.Name.unique()
winner = []
for name in names:
    x = pred[pred.Name == name].Party
    winner.append(next(iter(x)))

output = pd.DataFrame({"Name":names, "Winner":winner})
output["Constituency"] = output.Name.map(get_con_number)
output = output[["Constituency", "Name", "Winner"]]
output.head()

Unnamed: 0,Constituency,Name,Winner
0,1,Aberavon,Labour
1,2,Aberconwy,Conservative
2,3,Aberdeen North,Scottish National Party
3,4,Aberdeen South,Scottish National Party
4,5,Aberdeenshire West & Kincardine,Scottish National Party


In [80]:
output.to_csv("mdaws_prediction.csv")
output.to_excel("mdaws_prediction.xlsx")