# Create node list containing geolocation
* nodes (ID, speciality, Gemeinde, num_initial_patients)

In [None]:
import pandas as pd
from os.path import join
import numpy as np
from scipy import sparse 

In [None]:
MF = pd.read_csv('data/matched_and_imputed_doctors_with_capacity_threshold0.9.csv')

NL = MF[['adj_index','specialization','N_total_quarterly_patients','gemeinde']].copy()
NL.rename(columns={'adj_index':'docid','specialization':'fg','gemeinde':'gemeinde_id',
                   'N_total_quarterly_patients':'initial_patients'},inplace=True)

In [None]:
### add info on latitude, longitude and gemeinde name
HCP = pd.read_csv('data/municipality_geolocations.csv', encoding='utf-8')

# file from statistik austria
GEM = pd.read_excel('data/gemliste_knz.xls',usecols=['Gemeinde kennziffer', 'Gemeindename','PLZ des Gem.Amtes'],
            skiprows=3, nrows=2117, dtype={'Gemeinde kennziffer':int,'Gemeindename':str,'PLZ des Gem.Amtes':int})
GEM.rename(columns={'Gemeinde kennziffer':'GKZ','PLZ des Gem.Amtes':'PLZ'},inplace=True)

HCP = HCP.join(GEM.set_index('PLZ'),on='PLZ')
HCP.reset_index(drop=True,inplace=True)
HCP.drop(np.where(~np.isfinite(HCP.GKZ))[0].item(),inplace=True)
HCP.GKZ = HCP.GKZ.astype(int)

HCP.drop_duplicates(['GKZ','Gemeindename'],keep='first',inplace=True)
HCP.reset_index(drop=True,inplace=True)

In [None]:
### new gemeinde
gkz = 41625
new_name = 'Vorderweißenbach'
new_lat = 48.55
new_long = 14.2166667
new_PLZ = 4191

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

### new gemeinde
gkz = 40819
new_name = 'Peuerbach'
new_lat = 48.345556
new_long = 13.774167
new_PLZ = 4722

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

### new gemeinde
gkz = 61058
new_name = 'Vogau'
new_lat = 46.731944
new_long = 15.600278
new_PLZ = 8472

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

### new gemeinde
gkz = 62347
new_name = 'Murfeld'
new_lat = 46.712869
new_long = 15.692617
new_PLZ = 8423

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

### new gemeinde
gkz = 41310
new_name = 'Helfenberg'
new_lat = 48.533333
new_long = 14.133333
new_PLZ = 4184

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

### new gemeinde
gkz = 61056
new_name = 'Reigersdorf'
new_lat = 46.649600
new_long = 14.403000
new_PLZ = 9064

HCP = HCP.append({'Gemeinde name': new_name,'Gem. lat':new_lat,'Gem. lng':new_long,'PLZ':new_PLZ,
           'GKZ':gkz,'Gemeindename':new_name}, ignore_index=True)

In [None]:
### add district and federal state IDs
NL['district_ID'] = [int(str(x)[0:3]) for x in NL.gemeinde_id]
NL['state_ID'] = [int(str(x)[0]) for x in NL.gemeinde_id]

NL = NL.join(HCP[['Gem. lat', 'Gem. lng', 'GKZ', 'Gemeindename']].rename(columns={'GKZ':'gemeinde_id'})\
    .set_index('gemeinde_id'), on='gemeinde_id')

NL.rename(columns={'Gem. lat':'gemeinde_lat','Gem. lng':'gemeinde_lng',
                   'Gemeindename':'gemeinde_name'},inplace=True)

In [None]:
NL.to_csv('results/PSNW_NodeList.csv',sep=',',index=False,encoding='ISO-8859-15')