Install and import neccessary libraries. This implementation uses Pandas (and GeoPandas) as well as geopy for geocoding.

In [67]:
!pip install pandas
!pip install folium matplotlib mapclassify geopandas
!pip install geopy

import pandas as pd
import geopandas as gpd
import geopy as gp

import os
import time



In [56]:
# Setup geocoding provider

from geopy.geocoders import GoogleV3
API_KEY = "<insert api key>"

geolocator = GoogleV3(api_key=API_KEY)

Configure variables used to define input data format. Change these if changing the format of the election data.

In [74]:
COUNTRY = "Hrvatska"

ADDRESS_COLUMN = "Adresa BM"
CITY_COLUMN = "Grad/općina/država"

POINT_COLUMN = "geometry"

Firstly, load and concatenate all election voting spot locations and convert their addresses to coordinates.

In [70]:
dataset = gpd.GeoDataFrame()

for (path, dirnames, filenames) in os.walk("data"):
  for filename in filenames:
    frame = pd.read_excel(os.sep.join([path, filename]))

    dataset = pd.concat([dataset, frame])

print(dataset)

     Rbr IJ Naziv izborne jedinice  Rbr.županije             Županija  \
0         1    I. IZBORNA JEDINICA           1.0  ZAGREBAČKA ŽUPANIJA   
1         1    I. IZBORNA JEDINICA           1.0  ZAGREBAČKA ŽUPANIJA   
2         1    I. IZBORNA JEDINICA           1.0  ZAGREBAČKA ŽUPANIJA   
3         1    I. IZBORNA JEDINICA           1.0  ZAGREBAČKA ŽUPANIJA   
4         1    I. IZBORNA JEDINICA           1.0  ZAGREBAČKA ŽUPANIJA   
..      ...                    ...           ...                  ...   
820       7  VII. IZBORNA JEDINICA           4.0  KARLOVAČKA ŽUPANIJA   
821       7  VII. IZBORNA JEDINICA           4.0  KARLOVAČKA ŽUPANIJA   
822       7  VII. IZBORNA JEDINICA           4.0  KARLOVAČKA ŽUPANIJA   
823       7  VII. IZBORNA JEDINICA           4.0  KARLOVAČKA ŽUPANIJA   
824       7  VII. IZBORNA JEDINICA           4.0  KARLOVAČKA ŽUPANIJA   

    Oznaka Gr/Op/Dr Grad/općina/država  Rbr BM  \
0              grad      VELIKA GORICA       1   
1              grad    

In [71]:
relevant_dataset = dataset.loc[:, [ADDRESS_COLUMN, CITY_COLUMN]] # Keep only the index and two location columns

def make_complete_address_column(row):
  return str(row[ADDRESS_COLUMN] + ", " + row[CITY_COLUMN] + ", " + COUNTRY)

relevant_dataset[ADDRESS_COLUMN] = relevant_dataset.apply(make_complete_address_column, axis=1)
relevant_dataset = relevant_dataset.drop(CITY_COLUMN, axis=1) # CITY_COLUMN data is now a part of ADDRESS_COLUMN

print(relevant_dataset)

                                             Adresa BM
0    VEL.GORICA, TRG S.RADIĆA 5, VELIKA GORICA, Hrv...
1    VEL.GORICA, TRG S. RADIĆA 5, VELIKA GORICA, Hr...
2    VEL.GORICA, S. KOLARA 39, VELIKA GORICA, Hrvatska
3    VEL.GORICA, V. VIDRIĆA 2, VELIKA GORICA, Hrvatska
4    VEL.GORICA, CVJETNO NASELJE 18 A, VELIKA GORIC...
..                                                 ...
820                         ŽAKANJE, ŽAKANJE, Hrvatska
821                        KOHANJAC, ŽAKANJE, Hrvatska
822                 ZALUKA LIPNIČKA, ŽAKANJE, Hrvatska
823                       PRAVUTINA, ŽAKANJE, Hrvatska
824                     VELIKA PAKA, ŽAKANJE, Hrvatska

[46000 rows x 1 columns]


The remaining column should be enough to geocode the address into the coordinates of the voting place

In [90]:
point_column_data = []

#relevant_dataset = relevant_dataset.drop(POINT_COLUMN, axis=1)

for index, row in relevant_dataset.iterrows():
  result = geolocator.geocode(row[ADDRESS_COLUMN], exactly_one=True)
  point_column_data.append(result.point)

  print(result)

point_frame = gpd.GeoDataFrame({ POINT_COLUMN: point_column_data })
relevant_dataset = relevant_dataset.join(point_frame)

print(relevant_dataset)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Mandićeva ul. 30, 21311, Split, Croatia
Mandićeva ul. 34, 21311, Split, Croatia
Josipova 2, 21311, Split, Croatia
Vrh Visoke 8, 21000, Split, Croatia
Cesta Mira 3, 21000, Split, Croatia
Vrh Visoke 8, 21000, Split, Croatia
Pazdigradska ul. 1, 21000, Split, Croatia
Pazdigradska ul. 1, 21000, Split, Croatia
Pazdigradska ul. 1, 21000, Split, Croatia
Pazdigradska ul. 1, 21000, Split, Croatia
Hrvatskih dragovoljaca 48, 21251, Donje Sitno, Croatia
Gornje Sitno, Croatia
Gornje Sitno, Croatia
Ul. Svetog Mihovila 52, 21000, Kamen, Croatia
Srinjine, Croatia
Srinjine, Croatia
Ivankova ul. 13, 21311, Stobreč, Croatia
Ivankova ul. 13, 21311, Stobreč, Croatia
KOREŠNICA 2, 21251, Žrnovnica, Croatia
Žrnovnica, Croatia
Put Duboke Garme 2, 21224, Slatine, Croatia
Obala dr. Franje Tuđmana 1, 21460, Stari Grad, Croatia
Ul. Novo Riva 2, 21460, Stari Grad, Croatia
Dol, Croatia
Vrbanj, Croatia
Ul. Hrvatskih Velikana 1, 21400, Supetar, Croatia
Su

In [119]:
out_format = pd.DataFrame(relevant_dataset);
out_format["longitude"] = out_format[POINT_COLUMN].apply(lambda x: x.longitude)
out_format["latitude"] = out_format[POINT_COLUMN].apply(lambda x: x.latitude)

out_format = out_format.drop(POINT_COLUMN, axis=1)

print(out_format)

out_format.to_csv("out_data.csv")

                                             Adresa BM  longitude   latitude
0    VEL.GORICA, TRG S.RADIĆA 5, VELIKA GORICA, Hrv...  16.067043  45.711685
1    VEL.GORICA, TRG S. RADIĆA 5, VELIKA GORICA, Hr...  16.067043  45.711685
2    VEL.GORICA, S. KOLARA 39, VELIKA GORICA, Hrvatska  16.068541  45.710432
3    VEL.GORICA, V. VIDRIĆA 2, VELIKA GORICA, Hrvatska  16.066803  45.710931
4    VEL.GORICA, CVJETNO NASELJE 18 A, VELIKA GORIC...  16.069713  45.718287
..                                                 ...        ...        ...
820                         ŽAKANJE, ŽAKANJE, Hrvatska  16.134066  45.816710
821                        KOHANJAC, ŽAKANJE, Hrvatska  16.060649  45.821618
822                 ZALUKA LIPNIČKA, ŽAKANJE, Hrvatska  16.060649  45.821618
823                       PRAVUTINA, ŽAKANJE, Hrvatska  15.926018  45.812410
824                     VELIKA PAKA, ŽAKANJE, Hrvatska  15.926018  45.812410

[46000 rows x 3 columns]
