In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from pylab import cm
import os
from scipy.spatial.distance import pdist, squareform


%matplotlib inline

In [None]:
parent_dir = os.path.split(os.getcwd())[0]

In [None]:
zones = gpd.read_file(parent_dir + '\\Data\\New\\lms_zone_du_new.shp') # LMS Zone data
dens = pd.read_csv((parent_dir + '\\Data\\New\\lms_zone_density.csv'), index_col=0)

In [None]:
pc4 = gpd.read_file(parent_dir + '\Data\PC4 2022\cbs_pc4_2019_vol.gpkg') # PC4 data

In [None]:
lms_pc4_match = pd.read_csv(parent_dir + '\\Data\\New\\lms_pc4_match_v2.csv') # df matching PC4 with LMS zones

In [None]:
pc4.loc[:, 'postcode4'] = pc4['postcode4'].astype(int)
pc4_numeric = pc4.select_dtypes(include=['int16', 'int32', 'int64', 'float16', 'float32', 'float64'])
pc4[pc4_numeric < 0] = np.nan

In [None]:
dest_data = zones[['ZONE_ID']]

## Distance to city centre - determine the centre locations

In [None]:
zones['double_dens'] = dens.Job_dens + dens.Pop_dens

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='double_dens', cmap=cmap, vmax=30,
         legend=True, legend_kwds={"label": "Job and population density [jobs + people/ ha]"})

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('lightskyblue')
ax.set_title('Job and population density; capped at 30 jobs + people / ha');

In [None]:
zones['Dens30plus'] = 0
zones['Dens30plust'] = 0


In [None]:
zones.loc[zones.double_dens >= 20, 'Dens30plus'] = 1
zones.loc[zones.double_dens >= 20, 'Dens30plust'] = 1

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='Dens30plus', cmap=cmap,
         legend=True, legend_kwds={"label": "Center zones"})

ax.set_facecolor('lightskyblue')
ax.set_title('Zones marked with combined density >= 20')
ax.set_xticks([])
ax.set_yticks([])

There has to be some distance between city centres. Perhaps only select one zone per municipality?

In [None]:
for gem in set(zones.GEM_NAAM):
    high_dens_zones = zones[(zones.GEM_NAAM == gem) & (zones.Dens30plus == 1)]
    
    if len(high_dens_zones) > 0:
        max_val = high_dens_zones.double_dens.max()
        zones.loc[(zones.GEM_NAAM == gem) & (zones.double_dens < max_val), 'Dens30plus'] = 0
    
    # break

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='Dens30plus', cmap=cmap,
         legend=True, legend_kwds={"label": "Center zones"})

ax.set_facecolor('lightskyblue')
ax.set_title('City center zones (max 1 per municipality)')
ax.set_xticks([])
ax.set_yticks([])

With this definition and zones with more than 20 units/ ha, the center zones are not too big, only one is bigger than 2000 ha

In [None]:
zones[(zones.Dens30plus == 1) & (zones.OPP > 2000)]

In total there are 131 center zones.

In [None]:
len(zones[(zones.Dens30plus == 1)])

## Calculate distance to city centre

Set index as lms zone to make the following calculations more clear

In [None]:
# zones_id = zones.set_index(zones['ZONE_ID']).copy()

In [None]:
points = zones[['XCOORD', 'YCOORD']] # df with all coordinated centroids zone
    
# Distances between all points
distances = pd.DataFrame(squareform(pdist(points)), columns=points.index, index=points.index) 

# Get distances to all center zones
distances = distances[zones.Dens30plus == 1]

# # All neigbour zones within a distance of 3 km
# close_zones = distances[(distances <= 3000) & (distances != 0)]

In [None]:
distances

In [None]:
dist_to_center = distances.min()

In [None]:
dist_to_center = np.round(dist_to_center / 1000, 2)

In [None]:
dist_to_center

In [None]:
dest_data.loc[:, 'Dist_to_center'] = dist_to_center

In [None]:
dest_data

In [None]:
zones['Dist_center'] = dist_to_center

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='Dist_center', cmap=cmap, vmax=30,
         legend=True, legend_kwds={"label": "Distance to city centre [km]"})

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('lightskyblue')
ax.set_title('Distance to a city centre');

In [None]:
f, ax = plt.subplots(2, 2)

f.set_figwidth(13)
f.set_figheight(13)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax[0, 0], column='double_dens', cmap=cmap, vmax=30,
         legend=True, legend_kwds={"label": "Job and population density [jobs + people/ ha]"})

zones.plot(ax=ax[0, 1], column='Dens30plust', cmap=cmap,
         legend=True, legend_kwds={"label": "Center zones"})

zones.plot(ax=ax[1, 0], column='Dens30plus', cmap=cmap,
         legend=True, legend_kwds={"label": "Center zones"})

zones.plot(ax=ax[1, 1], column='Dist_center', cmap=cmap, vmax=30,
         legend=True, legend_kwds={"label": "Distance to city centre [km]"})

for i in range(2):
    for j in range(2):
        ax[i, j].set_xticks([])
        ax[i, j].set_yticks([])
        ax[i, j].set_facecolor('lightskyblue')


ax[0, 0].set_title('a. Combined density (capped at 30 units/ha)')
ax[0, 1].set_title('b. All zones with density >= 20 units/ha')
ax[1, 0].set_title('c. All selected centre zones')
ax[1, 1].set_title('d. Distance to city centre')



## Average distance to points of interest

In [None]:
food = pc4.iloc[:, np.r_[0:2, 40, 52, 56, 64]]
commercial = pc4.iloc[:, np.r_[0:2, 44, 48]]
health = pc4.iloc[:, np.r_[0:2, 118, 122, 130, 131]]
recreation = pc4.iloc[:, np.r_[0:2, 80, 84, 88, 92, 96, 97, 98, 101]]
education = pc4.iloc[:, np.r_[0:2, 102, 106, 110, 68, 72]]

PoI = [food, commercial, health, recreation, education]


In [None]:
PoI_arr = np.zeros((5, 1406))



for z in range(1, 1407):

    for c in range(5):
        # Select all postal codes belonging to the LMS zone
        postal_codes = lms_pc4_match[lms_pc4_match.LMS == z].PC4
        x = PoI[c][PoI[c].postcode4.isin(postal_codes)]

        # Calculate the mean distance for all points of interest in the category
        a = x.iloc[:, 2:].mean(axis=1)

        # Determine the weighted average for all zones
        # [~a.isnull()] is included to make sure that possible nan values in the distances
        # are handled correctly
        if x.iloc[:, 1][~a.isnull()].sum() > 0: # Check if not 0 or null inhabitants
            PoI_arr[c, z - 1] = (a[~a.isnull()] * x.iloc[:, 1][~a.isnull()]).sum() / x.iloc[:, 1][~a.isnull()].sum()
        
        else:
            PoI_arr[c, z - 1] = np.nan


In [None]:
PoI_arr = np.round(PoI_arr, 2)

In [None]:
PoI_arr

In [None]:
dest_data.loc[:, 'Dist_food'] = PoI_arr[0]
dest_data.loc[:, 'Dist_commercial'] = PoI_arr[1]
dest_data.loc[:, 'Dist_health'] = PoI_arr[2]
dest_data.loc[:, 'Dist_recreation'] = PoI_arr[3]
dest_data.loc[:, 'Dist_education'] = PoI_arr[4]


In [None]:
dest_data.loc[:, 'Dist_point_of_interest'] = np.round(np.nanmean(PoI_arr, axis=0), 2)

In [None]:
dest_data

In [None]:
zones['Dist_PoI'] = np.round(np.nanmean(PoI_arr, axis=0), 2)
zones.loc[:, 'Dist_food'] = PoI_arr[0]
zones.loc[:, 'Dist_commercial'] = PoI_arr[1]
zones.loc[:, 'Dist_health'] = PoI_arr[2]
zones.loc[:, 'Dist_recreation'] = PoI_arr[3]
zones.loc[:, 'Dist_education'] = PoI_arr[4]

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='Dist_PoI', cmap=cmap,
         legend=True, legend_kwds={"label": "Average distance to points of interest [km]"})

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('lightskyblue')
ax.set_title('Distance to points of interest');

## Save to csv

In [None]:
dest_data.to_csv(parent_dir + '\\Data\\New\\lms_zone_dest_access.csv')

In [None]:
pd.read_csv(parent_dir + '\\Data\\New\\lms_zone_dest_access.csv')