In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os

%matplotlib inline

In [None]:
parent_dir = os.path.split(os.getcwd())[0] # Get parent directory

In [None]:
lms_pc4 = pd.read_excel(parent_dir + '\\Data\LMS\\Zone data\\LMS Zonenummering 2018 met PC4.xlsx', index_col=0)
lms_du = gpd.read_file(parent_dir + '\\Data\\New\\lms_zone_du_new.shp')
ovin = pd.read_csv(parent_dir + '\\Data\\New\\merged_ovin.csv', index_col=0)

Match LMS zones with PC4 using an existing file. Print some data inbetween to check if everything goes right.

In [None]:
lms_du.head()

In [None]:
lms_pc4[~lms_pc4['PC4'].duplicated()].shape

In [None]:
lms_pc4[lms_pc4.LMS == 10][lms_pc4[lms_pc4.LMS == 10].duplicated(subset=['LMS', 'PC4'])]

In [None]:
lms_pc4_v2 = lms_pc4[~lms_pc4.duplicated(subset=['LMS', 'PC4'])].dropna()

duplicated_pc4 = lms_pc4_v2[lms_pc4_v2['PC4'].duplicated(keep=False)]

duplicated_pc4

In [None]:
unclear_pc4 = list(set(duplicated_pc4.PC4))

In [None]:

lms_pc4 = lms_pc4[~lms_pc4.duplicated(subset=['LMS', 'PC4'])].dropna()
lms_pc4 = lms_pc4[~lms_pc4['PC4'].duplicated()]

In [None]:
lms_pc4.head()

There is not a LMS zone for each PC4 zone!

In [None]:
s = set(lms_pc4['PC4'])

In [None]:
o = set(ovin['VertPC'])
a = set(ovin['AankPC'])

o = a | o

In [None]:
print(f'There are {len(o - s)} postal codes that are not linked to a LMS zone')
np.array(list(o-s))

Add degree of urbanisation to postal code

In [None]:
ovin.loc[:, 'VertZone'] = ovin['VertPC'].map(lms_pc4.set_index('PC4')['LMS'])
ovin.loc[:, 'AankZone'] = ovin['AankPC'].map(lms_pc4.set_index('PC4')['LMS'])

ovin.loc[:, 'VertDU'] = ovin['VertZone'].map(lms_du.set_index('ZONE_ID')['deg_urba'])
ovin.loc[:, 'AankDU'] = ovin['AankZone'].map(lms_du.set_index('ZONE_ID')['deg_urba'])


In [None]:
ovin.head()

In [None]:
missing_DU = ovin[(ovin['VertDU'].isnull()) | (ovin['AankDU'].isnull())][['OPID', 'VertPC', 'AankPC', 
                                                                          'VertDU', 'AankDU', 'VertZone', 'AankZone']]

len(missing_DU) / len(ovin) * 100

In [None]:
missing_DU_lst = list(set(ovin[(ovin['VertDU'].isnull())].VertPC) | set(ovin[(ovin['AankDU'].isnull())].AankPC))

In [None]:
missing_DU

These postal codes need some extra attention when assigning a LMS zone. There are some PC4 zones that are mapped to multiple LMS zones and there are some PC4 zones that are not mapped to any zone.

This will be solved in Notebook 4

In [None]:
unclearpc4 = np.array(unclear_pc4)
missingpc4 = np.array(missing_DU_lst)

np.savetxt(parent_dir + '\\Data\\New\\missingPC4.csv', missingpc4)
np.savetxt(parent_dir + '\\Data\\New\\unclearPC4.csv', unclearpc4)


In [None]:
duplicated_pc4.to_csv(parent_dir + '\\Data\\New\\duplicatedPC4.csv')

In [None]:
np.loadtxt(parent_dir + '\\Data\\New\\missingPC4.csv')

In [None]:
np.loadtxt(parent_dir + '\\Data\\New\\unclearPC4.csv')

In [None]:
ovin[ovin.AankPC == 3868][['AankGem']]

### Basic plots

Making some basic plots using the DU to test if everything works.

In [None]:
# Calculate the number of trips for each mode and each DU
mode_vert_DU = np.zeros((6, 8))
mode_aank_DU = np.zeros((6, 8))

for i in range(1, 9):
    mode_vert_DU[:, i - 1] = ovin[ovin.KHvm == i].groupby(['VertDU']).FactorV_final.sum()
    mode_aank_DU[:, i - 1] = ovin[ovin.KHvm == i].groupby(['AankDU']).FactorV_final.sum()


In [None]:
tot_trip_DU = np.sum(mode_vert_DU, axis=1)
tot_trip_DU

In [None]:
labels = ['Car driver', 'Car passenger', 'Train', 
          'BTM', 'Moped', '(e-)Bike', 'Walking', 'Other']

du = np.arange(1, 7)

In [None]:
f, ax = plt.subplots(1, 2)
f.set_figwidth(10)

direction = [mode_vert_DU, mode_aank_DU]
direction_label = ['Departing', 'Arrival']

for i in range(2):
    ax[i].bar(du, direction[i][:, 0] / tot_trip_DU, label='Car driver', color='firebrick')
    ax[i].bar(du, direction[i][:, 1] / tot_trip_DU, label='Car passenger', 
              bottom=direction[i][:, 0] / tot_trip_DU, color='salmon')
    ax[i].legend()

    ax[i].set_xlabel('Degree of urbanisation')
    ax[i].set_ylabel('Fraction of trips by car')
    ax[i].set_title(f'{direction_label[i]} degree of urbanisation')

    ax[i].set_ylim(0, 0.55)
    ax[i].set_yticks(np.arange(0, 0.6, 0.05))
    ax[i].set_axisbelow(True)
    ax[i].grid(axis='y')

f.suptitle('Car trips by degree of urbanisation');

In [None]:
f, ax = plt.subplots(1, 2)
f.set_figwidth(10)

direction = [mode_vert_DU, mode_aank_DU]
direction_label = ['Departing', 'Arrival']

for i in range(2):
    ax[i].bar(du, direction[i][:, 2] / tot_trip_DU, label='Train', color='firebrick')
    ax[i].bar(du, direction[i][:, 3] / tot_trip_DU, label='BTM', 
              bottom=direction[i][:, 2] / tot_trip_DU, color='salmon')
    ax[i].legend()

    ax[i].set_xlabel('Degree of urbanisation')
    ax[i].set_ylabel('Fraction of trips by public transport')
    ax[i].set_title(f'{direction_label[i]} degree of urbanisation')

    ax[i].set_ylim(0, 0.2)
    ax[i].set_yticks(np.arange(0, 0.25, 0.025))
    ax[i].set_axisbelow(True)
    ax[i].grid(axis='y')

f.suptitle('Public transport trips by degree of urbanisation');

In [None]:
f, ax = plt.subplots(1, 2)
f.set_figwidth(10)

direction = [mode_vert_DU, mode_aank_DU]
direction_label = ['Departing', 'Arrival']

for i in range(2):
    ax[i].bar(du, direction[i][:, 5] / tot_trip_DU, label='(e-)Bike', color='firebrick')
    ax[i].bar(du, direction[i][:, 6] / tot_trip_DU, label='Walking', 
              bottom=direction[i][:, 5] / tot_trip_DU, color='salmon')
    ax[i].legend()

    ax[i].set_xlabel('Degree of urbanisation')
    ax[i].set_ylabel('Fraction of trips by active modes')
    ax[i].set_title(f'{direction_label[i]} degree of urbanisation')

    ax[i].set_ylim(0, 0.55)
    ax[i].set_yticks(np.arange(0, 0.6, 0.05))
    ax[i].set_axisbelow(True)
    ax[i].grid(axis='y')

f.suptitle('Active mode trips by degree of urbanisation');

In [None]:
f, ax = plt.subplots(1, 3)
f.set_figwidth(10)

direction = [mode_vert_DU, mode_aank_DU]
direction_label = ['Car', 'Public transport', 'Active modes']
mode_labels = ['Car driver', 'Car passenger', 'Train', 'BTM', '(e-)Bike', 'Walking']
mode_lst = [0, 2, 5]

for i in range(3):
    ax[i].bar(du, direction[0][:, mode_lst[i]] / tot_trip_DU, label=mode_labels[i * 2], color='firebrick')
    ax[i].bar(du, direction[0][:, mode_lst[i] + 1] / tot_trip_DU, label=mode_labels[i * 2 + 1], 
              bottom=direction[0][:, mode_lst[i]] / tot_trip_DU, color='salmon')
    ax[i].legend()

    ax[i].set_xlabel('Degree of urbanisation')
    ax[0].set_ylabel(f'Fraction of trips')
    ax[i].set_title(f'{direction_label[i]}')

    ax[i].set_ylim(0, 0.55)
    ax[i].set_yticks(np.arange(0, 0.6, 0.05))
    ax[i].set_xticks(np.arange(1, 7))
    ax[i].set_axisbelow(True)
    ax[i].grid(axis='y')

f.suptitle('Ratio of trips for different modes by degree of urbanisation');

## Save to csv

In [None]:
# ovin.to_csv(parent_dir + '\\Data\\New\\Ovin_DU_new.csv')

In [None]:
# lms_pc4.to_csv(parent_dir + '\\Data\\New\\lms_pc4_match_new.csv')