# Create distance matrix (direct distance in km) between municipalities
* Gemeinde = municipality

In [2]:
import pandas as pd
import numpy as np
import geopy
from geopy.geocoders import Nominatim
from geopy import distance

### for progress bar in loops
from ipywidgets import IntProgress
from IPython.display import display
import time

## Read in file including geolocation of municipalities

In [3]:
HCP = pd.read_csv('data/municipality_geolocations.csv', encoding='utf-8')

In [4]:
# read file from statistik austria
GEM = pd.read_excel('data/gemliste_knz.xls',usecols=['Gemeinde kennziffer', 'Gemeindename','PLZ des Gem.Amtes'],
            skiprows=3, nrows=2117, dtype={'Gemeinde kennziffer':int,'Gemeindename':str,'PLZ des Gem.Amtes':int})
GEM.rename(columns={'Gemeinde kennziffer':'GKZ','PLZ des Gem.Amtes':'PLZ'},inplace=True)


HCP = HCP.join(GEM.set_index('PLZ'),on='PLZ')
HCP.reset_index(drop=True,inplace=True)
HCP.drop(np.where(~np.isfinite(HCP.GKZ))[0].item(),inplace=True)
HCP.GKZ = HCP.GKZ.astype(int)


HCP.drop_duplicates(['GKZ','Gemeindename'],keep='first',inplace=True)
HCP.reset_index(drop=True,inplace=True)

## Add municipality to distance matrix
* some municipalities not in files - add manually after SIM error

distmatrix = pd.read_csv('DistanceMatrix.csv',index_col=0,header=0)
HCP = pd.read_csv('Gemeinden_geoloc.csv',index_col=0,header=0)

In [5]:
### new gemeinde
gkz = 41625
new_name = 'Vorderweißenbach'
new_lat = 48.55
new_long = 14.2166667
new_PLZ = 4191

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [6]:
### new gemeinde
gkz = 40819
new_name = 'Peuerbach'
new_lat = 48.345556
new_long = 13.774167
new_PLZ = 4722

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [7]:
### new gemeinde
gkz = 61058
new_name = 'Vogau'
new_lat = 46.731944
new_long = 15.600278
new_PLZ = 8472

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [8]:
### new gemeinde
gkz = 62347
new_name = 'Murfeld'
new_lat = 46.712869
new_long = 15.692617
new_PLZ = 8423

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [9]:
### new gemeinde
gkz = 41310
new_name = 'Helfenberg'
new_lat = 48.533333
new_long = 14.133333
new_PLZ = 4184

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [10]:
### new gemeinde
gkz = 61056
new_name = 'Reigersdorf'
new_lat = 46.649600
new_long = 14.403000
new_PLZ = 9064

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [11]:
HCP.tail(10)

Unnamed: 0,Gemeinde name,Gem. lat,Gem. lng,PLZ,GKZ,Gemeindename
2113,Adnet 5421 Gemeindeamt,47.696628,13.131849,5421,50202,Adnet
2114,Namlos 6623 Gemeindeamt,47.364252,10.671598,6623,70823,Namlos
2115,Groß-Siegharts 3812 Gemeindeamt,48.7915,15.406551,3812,32207,Groß-Siegharts
2116,Bischofstetten 3232 Gemeindeamt,48.123478,15.470588,3232,31504,Bischofstetten
2117,Vorderweißenbach,48.55,14.216667,4191,41625,Vorderweißenbach
2118,Peuerbach,48.345556,13.774167,4722,40819,Peuerbach
2119,Vogau,46.731944,15.600278,8472,61058,Vogau
2120,Murfeld,46.712869,15.692617,8423,62347,Murfeld
2121,Helfenberg,48.533333,14.133333,4184,41310,Helfenberg
2122,Reigersdorf,46.6496,14.403,9064,61056,Reigersdorf


## Only run if whole distance matrix should be re-calculated

In [13]:
### get all combinations of municipalities
from itertools import combinations
combinations = list(combinations(HCP.GKZ,2))

In [22]:
distmatrix = pd.DataFrame(columns=HCP.GKZ,index=HCP.GKZ)

max_count = len(combinations)
f = IntProgress(min=0, max=max_count) # instantiate the bar
display(f) # display the bar
i = 0

### loop over all possible distances between municipalities
for comb in combinations:
    bezirk1 = comb[0]
    bezirk2 = comb[1]

    P1_coor = HCP.loc[HCP.GKZ==bezirk1,'Gem. lat'].item(), HCP.loc[HCP.GKZ==bezirk1,'Gem. lng'].item()
    P2_coor = HCP.loc[HCP.GKZ==bezirk2,'Gem. lat'].item(), HCP.loc[HCP.GKZ==bezirk2,'Gem. lng'].item()

    dist = distance.distance(P1_coor,P2_coor).km
    distmatrix.loc[bezirk1,bezirk2] = np.round(dist,2) 
    
    ### same for bezirk2, bezirk1
    distmatrix.loc[bezirk2,bezirk1] = np.round(dist,2)

    f.value = i # update the progress bar
    i += 1

IntProgress(value=0, max=2252503)

In [23]:
from datetime import datetime

now = datetime.now()

current_time = now.strftime("%H:%M:%S")
print("Current Time =", current_time)

Current Time = 15:33:20


# Check before saving!

In [None]:
distmatrix.tail()

In [None]:
distmatrix.to_csv('data/DistanceMatrix.csv')