In this notebook the neighbourhood zones are matched with the LMS zones.

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os

%matplotlib inline

Load files

In [None]:
parent_dir = os.path.split(os.getcwd())[0] # Get parent directory

In [None]:
lms_du = gpd.read_file(parent_dir + '\\Data\\New\\lms_zone_du_new.shp')
buurt = gpd.read_file(parent_dir + '\\Data\\Wijk buurt\\WijkBuurtkaart_2017_v3\\buurt_2017_v3.shp')
wijk = gpd.read_file(parent_dir + '\\Data\\Wijk buurt\\WijkBuurtkaart_2017_v3\\wijk_2017_v3.shp')

In [None]:
landuse = pd.read_csv(parent_dir + '\\Data\\Wijk buurt\\85217NED_UntypedDataSet_28032024_162743.csv', delimiter=';')

In [None]:
pc4 = gpd.read_file(parent_dir + '\Data\PC4 2022\cbs_pc4_2019_vol.gpkg') # load PC4 zones according to the CBS

In [None]:
# Remove zones with only water
buurt = buurt[buurt.WATER == 'NEE']
wijk = wijk[wijk.WATER == 'NEE']

Make plots of the 'wijken' and 'buurten'

In [None]:
f, ax = plt.subplots(1, 1)

buurt.plot(ax=ax, facecolor='none')
# wijk.plot(ax=ax, facecolor='none', edgecolor='blue')
lms_du.plot(ax=ax, facecolor='none', edgecolor='red', linewidth=4, alpha=0.2)
# pc4.plot(ax=ax, facecolor='none', edgecolor='green')

ax.set_xlim(200000, 240000)
ax.set_ylim(500000, 525000)
ax.plot([], [], color='red', linewidth=4, alpha=0.3, label='LMS')
ax.plot([], [], color='black', label='Neighbourhood')
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('Example of neighbourhood zones and LMS zones')
ax.legend()

In [None]:
f, ax = plt.subplots(1, 1)

# buurt.plot(ax=ax, facecolor='none', label='Buurt')
wijk.plot(ax=ax, facecolor='none', edgecolor='blue')
lms_du.plot(ax=ax, facecolor='none', edgecolor='red', linewidth=4, alpha=0.3)
# pc4.plot(ax=ax, facecolor='none', edgecolor='green')

ax.set_xlim(200000, 240000)
ax.set_ylim(500000, 525000)
ax.plot([], [], color='red', linewidth=4, alpha=0.3, label='LMS')
ax.plot([], [], color='blue', label='Wijk')

ax.legend()

LMS zones seem to follow the Buurt zones relatively close, looking at this sample. Wijk level is also fine, but a bit less exact. There will be continued with the buurten (=neighbourhood)

In [None]:
# Get centroids of the neighbourhood zones
buurt_cent = buurt[['BU_CODE', 'BU_NAAM', 'geometry']]
buurt_cent.loc[:, 'geometry'] = buurt.geometry.centroid

In [None]:
lms_du.crs ='EPSG:28992' # give LMS zones right coordinate system

In [None]:
new_gdf = lms_du.sjoin_nearest(buurt_cent, how='left', distance_col='dist_zone') # Match neigbourhood zone with LMS zone

In [None]:
new_gdf.head()

In [None]:
duplicates = new_gdf[new_gdf['BU_CODE'].duplicated(keep=False)]

In [None]:
dup = np.array(duplicates.BU_CODE)
x = dup[12]

Again, there are some duplicates.


For the indices 0, 2, and 18 it is obvious in which LMS zone the Buurt belongs. 
The other duplicate buurts are part of both zones and the assumption is made that the buurts are fully in both LMS zones 

In [None]:
# for i in range(0, len(dup), 2):
for i in [0, 2, 18]:

    x = dup[i]

    f, ax = plt.subplots(1, 1)

    lms_plot = new_gdf[new_gdf['BU_CODE'] == x].iloc[0:1].plot(ax=ax, facecolor='none', edgecolor='lightgrey', linewidth=4)
    lms_plot = new_gdf[new_gdf['BU_CODE'] == x].iloc[1:2].plot(ax=ax, facecolor='none', edgecolor='lightgreen', linewidth=4)

    buurt_plot = buurt[buurt['BU_CODE'] == x].plot(ax=ax, facecolor='none', edgecolor='red', linestyle='--', linewidth=2)
    buurt_cent[buurt_cent['BU_CODE'] == x].plot(ax=ax, label='Centroid buurt zone')

    ax.plot([], [], label=f'LMS zone {i}', color='grey')
    ax.plot([], [], label=f'LMS zone {i + 1}', color='lightgreen')

    ax.plot([], [], label='Buurt zones', color='red', linestyle='--')

    plt.legend()

## 0, 2, 16, 18

Remove rows 0, 3 and 18

In [None]:
bu_list = duplicates.iloc[[0, 3, 18]].BU_CODE
index_list = duplicates.iloc[[0, 3, 18]].index

In [None]:
new_gdf = new_gdf.loc[~((new_gdf.index.isin(index_list)) & (new_gdf.BU_CODE.isin(bu_list)))]

In [None]:
new_gdf.head()

### Check if there are LMS zones not included

In [None]:
len(set(new_gdf.ZONE_ID))

In [None]:
set(np.arange(1, 1407)) - set(new_gdf.ZONE_ID)

Zones 916 and 1377 do not have land-use data currently

There are clearly 'buurten' near enough, so now find the nearest buurt to add to that LMS zone, so there is at least some information about land-use.

### LMS zone 916

In [None]:
lms_du[lms_du.ZONE_ID.isin([916, 922])].plot()

In [None]:
f, ax = plt.subplots(1, 1)

buurt.plot(ax=ax, facecolor='none')
# wijk.plot(ax=ax, facecolor='none', edgecolor='blue')
lms_du.plot(ax=ax, facecolor='none', edgecolor='red', linewidth=4, alpha=0.2)
# pc4.plot(ax=ax, facecolor='none', edgecolor='green')

ax.set_xlim(92000, 94000)
ax.set_ylim(438000, 440000)
ax.plot([], [], color='red', linewidth=4, alpha=0.3, label='LMS')
ax.plot([], [], color='black', label='Buurt')
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('2 LMS zones within 1 neighbourhood zone')

ax.legend()

With some zooming around the map, for LMS zone 916, 922 shares the same buurt.

In [None]:
bu_code_916 = list(new_gdf[new_gdf.ZONE_ID == 922].BU_CODE)[0]
bu_code_916

In [None]:
buurt[buurt.BU_CODE == bu_code_916].plot()

In [None]:
lms_du_916 = lms_du[lms_du.ZONE_ID.isin([916])].sjoin_nearest(buurt_cent, how='left', distance_col='dist_zone')
lms_du_916

New Buurt code is the same as the one manually found

### LMS zone 1377

In [None]:
lms_du[lms_du.ZONE_ID.isin([1377])].plot()

In [None]:
f, ax = plt.subplots(1, 1)

buurt.plot(ax=ax, facecolor='none')
# wijk.plot(ax=ax, facecolor='none', edgecolor='blue')
lms_du.plot(ax=ax, facecolor='none', edgecolor='red', linewidth=4, alpha=0.2)
# pc4.plot(ax=ax, facecolor='none', edgecolor='green')

ax.set_xlim(135000, 145000)
ax.set_ylim(483000, 488000)
ax.plot([], [], color='red', linewidth=4, alpha=0.3, label='LMS')
ax.plot([], [], color='black', label='Buurt')

ax.legend()

Less clear to which zone the 'buurt' belongs. So first merge and then check if it is logical.

In [None]:
lms_du_1377 = lms_du[lms_du.ZONE_ID.isin([1377])].sjoin_nearest(buurt_cent, how='left', distance_col='dist_zone')
lms_du_1377

This is not the most logical buurt... Try again

In [None]:
fig, ax = plt.subplots(1, 1)

buurt[buurt.BU_CODE == list(lms_du_1377.BU_CODE)[0]].plot(ax=ax, facecolor='none', edgecolor='black')
lms_du[lms_du.ZONE_ID.isin([1377])].plot(ax=ax, facecolor='none', edgecolor='red')

ax.plot([], [], color='red',  label='LMS')
ax.plot([], [], color='black', label='Buurt')
plt.legend()


Now try to return several 'buurts' but not based on centroid.

In [None]:
lms_du_1377 = lms_du[lms_du.ZONE_ID.isin([1377])].sjoin_nearest(buurt[['BU_CODE', 'BU_NAAM', 'geometry']], 
                                                                how='left', distance_col='dist_zone')
lms_du_1377

In [None]:
fig, ax = plt.subplots(1, 1)

buurt[buurt.BU_CODE == list(lms_du_1377.BU_CODE)[0]].plot(ax=ax, facecolor='none', edgecolor='black')
lms_du[lms_du.ZONE_ID.isin([1377])].plot(ax=ax, facecolor='none', edgecolor='red')

ax.plot([], [], color='red',  label='LMS')
ax.plot([], [], color='black', label='Buurt')
plt.legend()


In [None]:
lms_du_1377 = lms_du_1377.iloc[0:1]

When looping through the 4 possible buurten, The first seems to be the most logical option.

Now finally, merge the 2 new dataframes with the full one.

In [None]:
new_gdf.loc[list(lms_du_916.index)[0]] = lms_du_916.iloc[0]

In [None]:
new_gdf.loc[list(lms_du_1377.index)[0]] = lms_du_1377.iloc[0]

In [None]:
new_gdf.tail()

New rows are added

In [None]:
set(np.arange(1, 1407)) - set(new_gdf.ZONE_ID)

And all LMS zones are now part of the geodataframe!

### Calculate land-use percentage for each LMS zone

In [None]:
zone_id_list = list(set(new_gdf.ZONE_ID))

In [None]:
landuse_df = pd.DataFrame(columns=landuse.columns[4:])
landuse_df = landuse_df.rename(columns={'Codering_3':'ZONE_ID'})
landuse_df = landuse_df.set_index('ZONE_ID')

for z in zone_id_list:

    bu = new_gdf[new_gdf.ZONE_ID == z].BU_CODE # Get buurten from LMS zone
    lms_bu = landuse[landuse.WijkenEnBuurten.isin(bu)] # Get landuses from buurten
    tot_opp = lms_bu.iloc[:, 5].sum() # Get total area of buurten

    land_use_arr = np.zeros(46)

    for i in range(46):
        land_use_arr[i] = np.round(lms_bu.iloc[:,i + 5].sum() / tot_opp, 3)

    landuse_df.loc[z] = land_use_arr


Also calculate total areas for each zone

In [None]:
landuse_df_area = pd.DataFrame(columns=landuse.columns[4:])
landuse_df_area = landuse_df_area.rename(columns={'Codering_3':'ZONE_ID'})
landuse_df_area = landuse_df_area.set_index('ZONE_ID')

for z in zone_id_list:

    bu = new_gdf[new_gdf.ZONE_ID == z].BU_CODE # Get buurten from LMS zone
    lms_bu = landuse[landuse.WijkenEnBuurten.isin(bu)] # Get landuses from buurten
    tot_opp = lms_bu.iloc[:, 5].sum() # Get total area of buurten

    land_use_arr = np.zeros(46)

    for i in range(46):
        land_use_arr[i] = np.round(lms_bu.iloc[:,i + 5].sum(), 3)

    landuse_df_area.loc[z] = land_use_arr


In [None]:
landuse_df

In [None]:
landuse_df_area

## Save to csv

In [None]:
# landuse_df.to_csv(parent_dir + '\\Data\\New\\landuse_lms.csv')

In [None]:
# landuse_df_area.to_csv(parent_dir + '\\Data\\New\\landuse_lms_area.csv')

In [None]:
# new_gdf.to_file(parent_dir + '\\Data\\New\\buurt_lms_match.shp')