# Create distance matrix (direct distance in km) between municipalities
* Gemeinde = municipality

In [None]:
import pandas as pd
import numpy as np
import geopy
from geopy.geocoders import Nominatim
from geopy import distance

### for progress bar in loops
from ipywidgets import IntProgress
from IPython.display import display
import time

## Read in file including geolocation of municipalities

In [None]:
HCP = pd.read_csv('data/municipality_geolocations.csv', encoding='utf-8')

In [None]:
# read file from statistik austria
GEM = pd.read_excel('data/gemliste_knz.xls',usecols=['Gemeinde kennziffer', 'Gemeindename','PLZ des Gem.Amtes'],
            skiprows=3, nrows=2117, dtype={'Gemeinde kennziffer':int,'Gemeindename':str,'PLZ des Gem.Amtes':int})
GEM.rename(columns={'Gemeinde kennziffer':'GKZ','PLZ des Gem.Amtes':'PLZ'},inplace=True)


HCP = HCP.join(GEM.set_index('PLZ'),on='PLZ')
HCP.reset_index(drop=True,inplace=True)
HCP.drop(np.where(~np.isfinite(HCP.GKZ))[0].item(),inplace=True)
HCP.GKZ = HCP.GKZ.astype(int)


HCP.drop_duplicates(['GKZ','Gemeindename'],keep='first',inplace=True)
HCP.reset_index(drop=True,inplace=True)

## Add municipality to distance matrix
* some municipalities not in files - add manually after SIM error

distmatrix = pd.read_csv('DistanceMatrix.csv',index_col=0,header=0)
HCP = pd.read_csv('Gemeinden_geoloc.csv',index_col=0,header=0)

In [None]:
### new gemeinde
gkz = 41625
new_name = 'Vorderweißenbach'
new_lat = 48.55
new_long = 14.2166667
new_PLZ = 4191

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [None]:
### new gemeinde
gkz = 40819
new_name = 'Peuerbach'
new_lat = 48.345556
new_long = 13.774167
new_PLZ = 4722

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [None]:
### new gemeinde
gkz = 61058
new_name = 'Vogau'
new_lat = 46.731944
new_long = 15.600278
new_PLZ = 8472

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [None]:
### new gemeinde
gkz = 62347
new_name = 'Murfeld'
new_lat = 46.712869
new_long = 15.692617
new_PLZ = 8423

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [None]:
### new gemeinde
gkz = 41310
new_name = 'Helfenberg'
new_lat = 48.533333
new_long = 14.133333
new_PLZ = 4184

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [None]:
### new gemeinde
gkz = 61056
new_name = 'Reigersdorf'
new_lat = 46.649600
new_long = 14.403000
new_PLZ = 9064

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [None]:
HCP.tail(10)

## Only run if whole distance matrix should be re-calculated

In [None]:
distmatrix = pd.DataFrame(columns=HCP.GKZ,index=HCP.GKZ)

max_count = len(distmatrix)*len(distmatrix)
f = IntProgress(min=0, max=max_count) # instantiate the bar
display(f) # display the bar
i = 0

c = 0
for bezirk1 in HCP.GKZ:
    c+=1
    for bezirk2 in HCP.GKZ:

        P1_coor = HCP.loc[HCP.GKZ==bezirk1,'Gem. lat'].item(), HCP.loc[HCP.GKZ==bezirk1,'Gem. lng'].item()
        P2_coor = HCP.loc[HCP.GKZ==bezirk2,'Gem. lat'].item(), HCP.loc[HCP.GKZ==bezirk2,'Gem. lng'].item()

        dist = distance.distance(P1_coor,P2_coor).km
        distmatrix.loc[bezirk1,bezirk2] = np.round(dist,2) 
        
        f.value = i # update the progress bar
        i += 1

# Check before saving!

In [None]:
distmatrix.tail()

In [None]:
distmatrix.to_csv('data/DistanceMatrix.csv')