In this notebook the all PC4 zones are matched to LMS zones that could not be matched easily. 

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os

%matplotlib inline

Load all neccesary files

In [None]:
parent_dir = os.path.split(os.getcwd())[0] # Get parent directory

In [None]:
lms_zones = gpd.read_file(parent_dir + '\\Data\\New\\lms_zone_du.shp') # load shapefile lms zones with DU
pc4 = gpd.read_file(parent_dir + '\Data\PC4 2022\cbs_pc4_2019_vol.gpkg') # load PC4 zones according to the CBS

# Lists of pc4 zpnes without match
missingPC4 = np.loadtxt(parent_dir + '\\Data\\New\\missingPC4.csv')
doublePC4 = pd.read_csv(parent_dir + '\\Data\\New\\duplicatedPC4.csv')

In [None]:
lms_du = gpd.read_file(parent_dir + '\\Data\\New\\lms_zone_du_new.shp')
lms_pc4 = pd.read_csv(parent_dir + '\\Data\\New\\lms_pc4_match_new.csv')

In [None]:
ovin = pd.read_csv(parent_dir + '\\Data\\New\\Ovin_final.csv', index_col=0)

Get centroids of PC4 zones

In [None]:
pc4_cent = pc4[['postcode4', 'geometry']]
pc4_cent.loc[:, 'geometry'] = pc4.geometry.centroid

In [None]:
lms_zones.crs ='EPSG:28992' # give LMS zones right coordinate system

These plots can be used to plot the PC4zones and corresponding LMS zones for the PC4 zones that are matched twice.

In [None]:
double_arr = np.array(list(set(doublePC4.PC4)))

x = double_arr[36]  # 16 25

In [None]:
lms_arr = np.array(doublePC4[doublePC4.PC4 == x].LMS)
x = str(x)[:4]

In [None]:
f, ax = plt.subplots(1, 1)

for zone in lms_arr:
    new_gdf = lms_zones[lms_zones.ZONE_ID == zone]
    lms_plot = new_gdf.plot(ax=ax, facecolor='none', edgecolor='lightgrey', linewidth=4)

pc4_plot = pc4[pc4['postcode4'] == x].plot(ax=ax, facecolor='none', edgecolor='red', linestyle='--', linewidth=2)
pc4_cent[pc4_cent['postcode4'] == x].plot(ax=ax, label='Centroid PC4 zone')

ax.plot([], [], label='LMS zones', color='grey')
ax.plot([], [], label='PC4 zones', color='red', linestyle='--')
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('3 LMS zones within 1 PC4 zone')
plt.legend()


Next, look at the zones without any LMS zone match. This is matched manually.

In [None]:
match_gdf = lms_zones.sjoin_nearest(pc4_cent, how='left', distance_col='dist_zone')

In [None]:
x = str(missingPC4[1])[:4]
x

In [None]:
empty_pc = []
match_arr = np.zeros((len(missingPC4), 2))
i = 0

for pc in missingPC4:

    x = str(pc)[:4]

    match = match_gdf[match_gdf['postcode4'] == x]
    

    f, ax = plt.subplots(1, 1)

    lms_plot = match.plot(ax=ax, facecolor='none', edgecolor='lightgrey', linewidth=4)
    pc4_plot = pc4[pc4['postcode4'] == x].plot(ax=ax, facecolor='none', edgecolor='red', linestyle='--', linewidth=2)
    pc4_cent[pc4_cent['postcode4'] == x].plot(ax=ax, label='Centroid PC4 zone')

    ax.plot([], [], label='LMS zones', color='grey')
    ax.plot([], [], label='PC4 zones', color='red', linestyle='--')
    ax.set_xticks([])
    ax.set_yticks([])
    
    if len(match.ZONE_ID) == 1:
        ax.set_title(f'Postal code {x} and LMS zone {int(match.ZONE_ID.iloc[0])}')
        match_arr[i, :] = [int(x), int(match.ZONE_ID.iloc[0])]

    elif len(match.ZONE_ID) == 0:
        ax.set_title(f'No match could be found for: {x}')
        empty_pc.append(x)
    else:
        ax.set_title(f'Postal code {x} has been matched with {np.array(match.ZONE_ID)}')
    plt.legend()

    i += 1

    # break

In [None]:
match_arr = match_arr[match_arr != 0].reshape(33, 2)

In [None]:
match_df = pd.DataFrame(match_arr, columns=['PC4', 'LMS'])

In [None]:
match_df = lms_pc4.merge(match_df, how='outer', on=['PC4', 'LMS'])

Add final matches to the OViN combined dataframe.

In [None]:
ovin.loc[:, 'VertZone'] = ovin['VertPC'].map(match_df.set_index('PC4')['LMS'])
ovin.loc[:, 'AankZone'] = ovin['AankPC'].map(match_df.set_index('PC4')['LMS'])

ovin.loc[:, 'VertDU'] = ovin['VertZone'].map(lms_du.set_index('ZONE_ID')['deg_urba'])
ovin.loc[:, 'AankDU'] = ovin['AankZone'].map(lms_du.set_index('ZONE_ID')['deg_urba'])


There are still 152 trips with a missing zone. These will also be removed.

In [None]:
ovin = ovin[~((ovin.VertZone.isnull()) | (ovin.AankZone.isnull()))]

In [None]:
ovin[ovin.OP == 1]

## Save to csv

In [None]:
# ovin.to_csv(parent_dir + '\\Data\\New\\Ovin_DU_newv2.csv')