In [58]:
import pandas as pd
import shapely
import os

import plotly.express as px
import plotly.graph_objects as go

from scipy.spatial import Voronoi

# Explore data

Terminology I need to define
* Division
* Booth
* AEC
* 2019 election information: why (booths change each election, diviosn borders change frequently as well)
* PPVC pre polling voting centre

In this document I want to:
* Explore current divisions and polling booths
* Plot them using plotly: have 2019 division as one region, and the booths shown inside. 
* Create a voronoi tesselation for the booths
* Plot the voronoi tesselation


## Get data
Data is available from the Australian Electoral Commision 

In [2]:
booth_info_loc = 'data/20190518/GeneralPollingPlacesDownload-24310.csv'

try:
    booths = pd.read_csv(booth_info_loc,
                         skiprows=1)
except IOError:
    booths = pd.read_csv('https://results.aec.gov.au/24310/Website/Downloads/GeneralPollingPlacesDownload-24310.csv')
    booths.to_csv(booth_info_loc)
    booths = pd.read_csv(booth_info_loc,
                         skiprows=1)

booths.head()

Unnamed: 0,State,DivisionID,DivisionNm,PollingPlaceID,PollingPlaceTypeID,PollingPlaceNm,PremisesNm,PremisesAddress1,PremisesAddress2,PremisesAddress3,PremisesSuburb,PremisesStateAb,PremisesPostCode,Latitude,Longitude
0,ACT,318,Bean,93925,5,Belconnen BEAN PPVC,Belconnen Community Centre,26 Chandler St,,,BELCONNEN,ACT,2900.0,-35.23893,149.069655
1,ACT,318,Bean,93927,5,BLV Bean PPVC,BLV Canberra,50 Marcus Clarke St,,,CANBERRA CITY,ACT,2601.0,-35.277334,149.126869
2,ACT,318,Bean,11877,1,Bonython,Bonython Primary School,64 Hurtle Ave,,,BONYTHON,ACT,2905.0,-35.4318,149.083
3,ACT,318,Bean,11452,1,Calwell,Calwell High School,111 Casey Cres,,,CALWELL,ACT,2905.0,-35.44067,149.1176
4,ACT,318,Bean,8761,1,Chapman,Chapman Primary School,46-50 Perry Dr,,,CHAPMAN,ACT,2611.0,-35.3564,149.042


## First just going to look at booths that I am familiar with

Testing with a small region to get experience with the data.

In [5]:

booths2500 = booths[booths['PremisesPostCode']==2500]
booths2500

Unnamed: 0,State,DivisionID,DivisionNm,PollingPlaceID,PollingPlaceTypeID,PollingPlaceNm,PremisesNm,PremisesAddress1,PremisesAddress2,PremisesAddress3,PremisesSuburb,PremisesStateAb,PremisesPostCode,Latitude,Longitude
724,NSW,114,Cunningham,65460,5,BLV Cunningham PPVC,BLV Cunningham,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.427135,150.898067
729,NSW,114,Cunningham,539,1,Coniston,Coniston Public School,123 Auburn St,,,CONISTON,NSW,2500.0,-34.4386,150.887
735,NSW,114,Cunningham,30105,5,Divisional Office (PREPOLL),Divisional Office,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.427011,150.898107
739,NSW,114,Cunningham,544,1,Gwynneville,Gwynneville Primary School,10A Acacia Ave,,,GWYNNEVILLE,NSW,2500.0,-34.418079,150.879339
742,NSW,114,Cunningham,546,1,Keiraville,Keiraville Public School,286 Gipps Rd,,,KEIRAVILLE,NSW,2500.0,-34.4147,150.873
744,NSW,114,Cunningham,555,1,Mangerton,Mount St Thomas Public School,12-14 Taronga Ave,,,MANGERTON,NSW,2500.0,-34.437281,150.870466
745,NSW,114,Cunningham,553,1,Mount Keira,Edmund Rice College,112 Mount Keira Rd,,,WEST WOLLONGONG,NSW,2500.0,-34.41998,150.863017
765,NSW,114,Cunningham,565,1,Wollongong,Wollongong Town Hall,93 Crown St,,,WOLLONGONG,NSW,2500.0,-34.425718,150.89771
766,NSW,114,Cunningham,34033,5,Wollongong CUNNINGHAM PPVC,3/51 Crown St,,,,WOLLONGONG,NSW,2500.0,-34.426057,150.899519
767,NSW,114,Cunningham,83535,1,Wollongong East,Wollongong Public School,67a Church St,,,WOLLONGONG,NSW,2500.0,-34.422548,150.896217


In [6]:
fig = px.scatter_mapbox(booths2500, lat="Latitude", lon="Longitude", hover_name="PremisesNm", 
                        hover_data=["DivisionNm", "PollingPlaceNm"],
                        zoom=11, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

First problem I can see: what is a booth doing in Nowra. That is definitely not in the 2500 postcode. The address for this booth is actually in central Wollongong and there are 5 others with the same address, but all with different coordinates. 

In [24]:
booths2500[booths2500['PremisesAddress3']=='43 Burelli St']

Unnamed: 0,State,DivisionID,DivisionNm,PollingPlaceID,PollingPlaceTypeID,PollingPlaceNm,PremisesNm,PremisesAddress1,PremisesAddress2,PremisesAddress3,PremisesSuburb,PremisesStateAb,PremisesPostCode,Latitude,Longitude
724,NSW,114,Cunningham,65460,5,BLV Cunningham PPVC,BLV Cunningham,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.427135,150.898067
735,NSW,114,Cunningham,30105,5,Divisional Office (PREPOLL),Divisional Office,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.427011,150.898107
1069,NSW,120,Gilmore,65612,5,BLV Gilmore PPVC,BLV Gilmore,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.875295,150.603992
1079,NSW,120,Gilmore,30111,5,Divisional Office (PREPOLL),Divisional Office,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.427011,150.898107
3029,NSW,150,Whitlam,65458,5,BLV Whitlam PPVC,BVL Whitlam,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.427135,150.898067
3042,NSW,150,Whitlam,30141,5,Divisional Office (PREPOLL),Divisional Office,Corporate Square,Level 4,43 Burelli St,WOLLONGONG,NSW,2500.0,-34.427135,150.898067


These are all `PollingPlaceTypeID` = 5. I suspect that these might be for prepolling or some sort of admin that is based out of the Burelli street office, but represents different real locations. For now I am going to remove them. I will come back to it later when I have some voting data to see if I can piece together what this actually is and how important it is.

First, just checking the other `PollingPlaceTypeID`:

In [16]:
booths.groupby('PollingPlaceTypeID')['DivisionID'].count()

PollingPlaceTypeID
1    7169
2     489
3      42
4      26
5    1149
Name: DivisionID, dtype: int64

In [27]:
booths.groupby('PollingPlaceTypeID').head(5).sort_values('PollingPlaceTypeID')

Unnamed: 0,State,DivisionID,DivisionNm,PollingPlaceID,PollingPlaceTypeID,PollingPlaceNm,PremisesNm,PremisesAddress1,PremisesAddress2,PremisesAddress3,PremisesSuburb,PremisesStateAb,PremisesPostCode,Latitude,Longitude
2,ACT,318,Bean,11877,1,Bonython,Bonython Primary School,64 Hurtle Ave,,,BONYTHON,ACT,2905.0,-35.4318,149.083
3,ACT,318,Bean,11452,1,Calwell,Calwell High School,111 Casey Cres,,,CALWELL,ACT,2905.0,-35.44067,149.1176
4,ACT,318,Bean,8761,1,Chapman,Chapman Primary School,46-50 Perry Dr,,,CHAPMAN,ACT,2611.0,-35.3564,149.042
5,ACT,318,Bean,8763,1,Chisholm,Caroline Chisholm School,108 Hambidge Cres,,,CHISHOLM,ACT,2905.0,-35.419522,149.122539
6,ACT,318,Bean,93916,1,City (Bean),Pilgrim House,69 Northbourne Ave,,,CANBERRA CITY,ACT,2601.0,-35.276702,149.129081
40,ACT,318,Bean,93923,2,Special Hospital Team 2,Multiple sites,,,,,ACT,,,
39,ACT,318,Bean,93921,2,Special Hospital Team 1,Multiple sites,,,,,ACT,,,
41,ACT,318,Bean,93924,2,Special Hospital Team 3,Multiple sites,,,,,ACT,2611.0,,
85,ACT,101,Canberra,32712,2,Special Hospital Team 1,Multiple sites,,,,,ACT,,,
86,ACT,101,Canberra,58810,2,Special Hospital Team 2,Multiple sites,,,,,ACT,,,


It appears that the values for `PollingPlaceTypeID`
* 1: a normal on the day polling booth
* 2: a mobile team that visits hospitals
* 3: a mobile team that visits remote locations
* 4: other mobile team 
* 5: Prepolling voting centre? 

For now I'll exclude everything other than 1. My goal right now is a proof of concept and not to overcomplicate things. I'll need to come back and double check the remote locations as it could be quite easy to have large parts of Australia underrepresented. 


In [8]:
booths_normal = booths[booths['PollingPlaceTypeID']==1]
booths2500 = booths_normal[booths_normal['PremisesPostCode']==2500]

In [9]:
fig = px.scatter_mapbox(booths2500, lat="Latitude", lon="Longitude", hover_name="PremisesNm", 
                        hover_data=["DivisionNm", "PollingPlaceNm"],
                        zoom=11, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Making a voronoi tesselation
I want to define a region around each booth to capture the area it represents. This is a simplification as it assumes that voters travel to their nearest booth. More could be done with ABS data, but for now this will be fine


In [46]:
booths2500['coordinates'] = booths2500.apply(lambda x: (x['Longitude'], x['Latitude']), axis=1)
vor = Voronoi(booths2500[['Longitude', 'Latitude']].values)
vor

In [54]:
regions = [[vor.vertices[point][0] for point in region] for region in vor.regions if -1 not in region]

In [24]:
import numpy as np
points = np.array([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2],
                   [2, 0], [2, 1], [2, 2]])
from scipy.spatial import Voronoi, voronoi_plot_2d
vor = Voronoi(points)

In [56]:
regions = [[vor.vertices[point] for point in region] for region in vor.regions]
regions[0]

[array([150.87182066, -34.42454715]),
 array([150.87897238, -34.4349364 ]),
 array([150.87200987, -34.42617212])]

In [68]:
data = [go.Scattermapbox(lat=booths2500['Latitude'],  lon=booths2500["Longitude"])]


fig = px.scatter_mapbox(booths2500, lat="Latitude", lon="Longitude", hover_name="PremisesNm", 
                        hover_data=["DivisionNm", "PollingPlaceNm"],
                        zoom=11, height=300)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

# Adding the voronoi tesselation
fig.update_layout(
    mapbox_style="white-bg",
    mapbox_layers=[
        {
            go.Scattermapbox(fill = "toself",
                             lon = [r[0] for r in region], lat = [r[1] for r in region],
                            marker = { 'size': 10, 'color': "orange" })
        }
                    for region in regions]
)
# for region in regions:
#     print([r[0] for r in region])
#     fig2 = go.Figure(go.Scattermapbox(fill = "toself",
#                                       lon = [r[0] for r in region], lat = [r[1] for r in region],
#                                       marker = { 'size': 10, 'color': "orange" }))
fig.show()

TypeError: unhashable type: 'Scattermapbox'

In [34]:
vor.points

array([[0., 0.],
       [0., 1.],
       [0., 2.],
       [1., 0.],
       [1., 1.],
       [1., 2.],
       [2., 0.],
       [2., 1.],
       [2., 2.]])

In [41]:
booths_normal.groupby('PremisesStateAb').count()

Unnamed: 0_level_0,State,DivisionID,DivisionNm,PollingPlaceID,PollingPlaceTypeID,PollingPlaceNm,PremisesNm,PremisesAddress1,PremisesAddress2,PremisesAddress3,PremisesSuburb,PremisesPostCode,Latitude,Longitude,coordinates
PremisesStateAb,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ACT,102,102,102,102,102,102,102,102,0,0,102,102,102,102,102
NSW,2435,2435,2435,2435,2435,2435,2435,2435,18,0,2435,2435,2435,2435,2435
NT,39,39,39,39,39,39,39,39,0,0,39,39,39,39,39
QLD,1299,1299,1299,1299,1299,1299,1299,1297,35,0,1299,1299,1299,1299,1299
SA,630,630,630,630,630,630,630,619,1,0,630,630,630,630,630
TAS,277,277,277,277,277,277,277,276,1,0,277,277,277,277,277
VIC,1666,1666,1666,1666,1666,1666,1666,1654,1,0,1666,1666,1666,1666,1666
WA,721,721,721,721,721,721,721,721,3,0,721,721,721,721,721


10186838987765714516831972232749936484449942329028078508518550046239459951436535048217193743869982210648908638073771326147731196638758894409111397754362489378247923951126615241852562203855152361272254734758028315995418876530994938227735318191607092586558943392462303314757888204428532729022982636318088852451487698130179115088051299915231823140302047142684937861537751601623764112939399218425809686284244938574175140564194417863591390494835292689290288274923657296205762756012097330980899194052820104273772613029309221758615289185940432539873554593102860026792566863227210761085296033989541332602876441676597865847422561512377050877660952324048444438981313421465190131059714908564013476001309682729812657683150548152931893908359610368