In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from pylab import cm
import os
from scipy import spatial

%matplotlib inline

In [None]:
parent_dir = os.path.split(os.getcwd())[0]

In [None]:
zones = gpd.read_file(parent_dir + '\\Data\\New\\lms_zone_du_new.shp') # LMS Zone data

In [None]:
ov = gpd.read_file(parent_dir + '\\Data\\ov\\Openbaar_Vervoer_Nederland.shp')

In [None]:
stations = pd.read_csv(parent_dir + '\\Data\\LMS\\Zone data\\Stations2018.txt', delimiter='\t', encoding ="ISO-8859-1")

In [None]:
transit_data = zones[['ZONE_ID']]

## Parking fare

In [None]:
transit_data.loc[:, 'Parking_fare'] = zones.PT_KP

In [None]:
transit_data

## Distance to train station

LMS determines many different train stations. We only determine the closest (and maybe closest intercity station?)

In [None]:
stations

In [None]:
coord = stations[['StationID', 'Xcoord', 'Ycoord']]
coord = coord.set_index('StationID')
coord = coord[(coord.Xcoord > 0) & (coord.Ycoord > 0)] # Filter all stations with no coordinates

In [None]:
points = zones[['XCOORD', 'YCOORD']] # df with all coordinated centroids zone

In [None]:
A = np.array(coord) # Make array from station coordinates

In [None]:
p = np.array(points) # Make array from all zone centroid coordinates


In [None]:
dist, index = spatial.KDTree(A).query(p) # Calculate closest station for each point

In [None]:
closest_station = stations.iloc[index]
closest_station = closest_station.reset_index()

In [None]:
closest_station.loc[:, 'ZONE_ID'] = transit_data.ZONE_ID
closest_station.loc[:, 'Distance'] = np.round(dist / 1000, 2)

In [None]:
closest_station # Dataframe matching each LMS zone with the closest train station.

In [None]:
zones['Distance_TS'] = dist / 1000

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='Distance_TS', cmap=cmap,
         legend=True, legend_kwds={"label": "Distance to closest train station [km]"})

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('lightskyblue')
ax.set_title('Distance to closest train station');

Now do the same for intercity stations

In [None]:
ic_stations = stations[(stations.AMICFrequency > 0) | (stations.PMICFrequency > 0) | (stations.OPICFrequency > 0)]

In [None]:
ic_coord = ic_stations[['StationID', 'Xcoord', 'Ycoord']]
ic_coord = ic_coord.set_index('StationID')
ic_coord = ic_coord[(ic_coord.Xcoord > 0) & (ic_coord.Ycoord > 0)] # Filter all stations with no coordinates

In [None]:
A_ic = np.array(ic_coord) # Make array from station coordinates

In [None]:
ic_dist, ic_index = spatial.KDTree(A_ic).query(p) # Calculate closest station for each point

In [None]:
ic_closest_station = ic_stations.iloc[ic_index]
ic_closest_station = ic_closest_station.reset_index()

In [None]:
ic_closest_station.loc[:, 'ZONE_ID'] = transit_data.ZONE_ID
ic_closest_station.loc[:, 'Distance'] = np.round(ic_dist / 1000, 2)

In [None]:
zones['Distance_TS_ic'] = ic_dist / 1000

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='Distance_TS_ic', cmap=cmap,
         legend=True, legend_kwds={"label": "Distance to closest intercity train station [km]"})

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('lightskyblue')
ax.set_title('Distance to closest intercity train station');

In [None]:
transit_data.loc[:, 'Distance_station'] = np.round(dist / 1000, 2)
transit_data.loc[:, 'Distance_ic_station'] = np.round(ic_dist / 1000, 2)

In [None]:
transit_data

## Frequency of trains

In [None]:
freq = closest_station[['AMTotalFrequency', 'OPTotalFrequency', 'PMTotalFrequency']].mean(axis=1)
freq

In [None]:
ic_freq = ic_closest_station[['AMTotalFrequency', 'OPTotalFrequency', 'PMTotalFrequency']].mean(axis=1)
ic_freq

In [None]:
transit_data.loc[:, 'Freq_station'] = np.round(freq, 1)
transit_data.loc[:, 'Freq_ic_station'] = np.round(ic_freq, 1)

In [None]:
transit_data

## Other Public transport stops

In [None]:
set(ov.modaliteit)

In [None]:
ov = ov[ov.modaliteit.isin(['Bus', 'Metro', 'Tram'])]

In [None]:
ov = ov.to_crs('EPSG:28992') # Get to right coordinate system

In [None]:
ov.head()

In [None]:
ov = ov[~ov.duplicated(subset=['stop_name', 'modaliteit'])] ## Remove duplicated

In [None]:
len(ov)

In [None]:
ov

In [None]:
ov_arr = np.vstack([ov.geometry.x, ov.geometry.y]).T

In [None]:
ov.plot(markersize=0.1)

To get a better idea of the closest pt stops, calculate the 6 nearest stops, as long as they are within 5 km.
(6 because often stops come in sets of 2)

In [None]:
dist_ov, index_ov = spatial.KDTree(ov_arr).query(p, k=1000, distance_upper_bound=2500) # Calculate 5 closest btm stop for each point


index_ov[dist_ov > 2500] = -1
dist_ov[dist_ov > 2500] = np.nan
# dist_ov = np.mean(dist_ov, axis=1) # Calculate mean of 6 nearest stops

In [None]:
index_ov

In [None]:
btm_stops = np.sum(index_ov > -1, axis=1)

In [None]:
btm_stops

Now do the same for bus, tram and metro individually

In [None]:
bus = ov[ov.modaliteit == 'Bus']
bus_arr = np.vstack([bus.geometry.x, bus.geometry.y]).T
dist_bus, index_bus = spatial.KDTree(bus_arr).query(p, k=1000, distance_upper_bound=2500) # Calculate closest bus stop for each point




index_bus[dist_bus > 5000] = -1
dist_bus[dist_bus > 5000] = np.nan
# dist_bus = np.mean(dist_bus, axis=1) # Calculate mean of 6 nearest stops
bus_stops = np.sum(index_bus > -1, axis=1)

In [None]:
metro = ov[ov.modaliteit == 'Metro']
metro_arr = np.vstack([metro.geometry.x, metro.geometry.y]).T
dist_metro, index_metro = spatial.KDTree(metro_arr).query(p, k=1000, distance_upper_bound=2500) # Calculate closest bus stop for each point

index_metro[dist_metro > 2500] = -1
dist_metro[dist_metro > 2500] = np.nan
# dist_metro = np.mean(dist_metro, axis=1) # Calculate mean of 6 nearest stops

metro_stops = np.sum(index_metro > -1, axis=1)

In [None]:
tram = ov[ov.modaliteit == 'Tram']
tram_arr = np.vstack([tram.geometry.x, tram.geometry.y]).T
dist_tram, index_tram = spatial.KDTree(tram_arr).query(p, k=1000, distance_upper_bound=2500) # Calculate closest bus stop for each point

index_tram[dist_tram > 5000] = -1
dist_tram[dist_tram > 5000] = np.nan
# dist_tram = np.mean(dist_tram, axis=1) # Calculate mean of 6 nearest stops

tram_stops = np.sum(index_tram > -1, axis=1)

In [None]:
# zones['OV_dist'] = dist_ov / 1000
# zones['Bus_dist'] = dist_bus / 1000
# zones['Metro_dist'] = dist_metro / 1000
# zones['Tram_dist'] = dist_tram / 1000

In [None]:
zones['OV_stops'] = btm_stops
zones['Bus_stops'] = bus_stops
zones['Metro_stops'] = metro_stops
zones['Tram_stops'] = tram_stops

In [None]:
zones

Let's say, all distances to metro and tram farther than 5 km, is set to nan. Because when you are very far away, it shouldn't matter if you're 50 km from a metro stop or 100 km. Both are not reachable

In [None]:
# index_metro[dist_metro > 5000] = -1
# index_tram[dist_tram > 5000] = -1

# dist_metro[dist_metro > 5000] = np.nan
# dist_tram[dist_tram > 5000] = np.nan

In [None]:
# zones['Metro_dist'] = dist_metro / 1000
# zones['Tram_dist'] = dist_tram / 1000

In [None]:
f, ax = plt.subplots(1, 1)
f.set_figwidth(13)
f.set_figheight(6)

cmap = cm.get_cmap('YlOrRd')

zones.plot(ax=ax, column='OV_stops', cmap=cmap,
         legend=True, legend_kwds={"label": "Distance to ov stop [km]"})

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('lightskyblue')
ax.set_title('Distance to closest ov stop');

Add values to dataframe

In [None]:
transit_data.loc[:, 'Distance_btm'] = np.round(dist_ov / 1000, 2)
transit_data.loc[:, 'Distance_bus'] = np.round(dist_bus / 1000, 2)
transit_data.loc[:, 'Distance_metro'] = np.round(dist_metro / 1000, 2)
transit_data.loc[:, 'Distance_tram'] = np.round(dist_tram / 1000, 2)

In [None]:
transit_data

## Quality OV stops

We are going to count the number of different lines of the nearest stop for each zone. The more different lines, the better the connection with the rest of the network.

In [None]:
ov_lines = np.zeros((4, 1406))
ov_df = [ov, bus, metro, tram]
indices = [index_ov, index_bus, index_metro, index_tram]


for z in range(1, 1407):
    
    for i in range(4):
        index = indices[i][z - 1]
        index = index[index >= 0] # Take only the valid indices

        str_arr = np.array(ov_df[i].iloc[index].lijnen)
        str_arr = str_arr.astype(str)
        count = np.char.count(str_arr, sub='Bus') + np.char.count(str_arr, sub='Metro') + np.char.count(str_arr, sub='tram')

        if len(count) > 0:
            ov_lines[i, z - 1] = np.mean(count)
        else:
            ov_lines[i, z - 1] = np.nan


    

In [None]:
transit_data.loc[:, 'Btm_lines'] = np.round(ov_lines[0], 2)
transit_data.loc[:, 'Bus_lines'] = np.round(ov_lines[1], 2)
transit_data.loc[:, 'Metro_lines'] = np.round(ov_lines[2], 2)
transit_data.loc[:, 'Tram_lines'] = np.round(ov_lines[3], 2)


In [None]:
transit_data.loc[:, 'btm_stops'] = btm_stops
transit_data.loc[:, 'Bus_stops'] = bus_stops
transit_data.loc[:, 'Metro_stops'] = metro_stops
transit_data.loc[:, 'Tram_stops'] = tram_stops

In [None]:
transit_data

## Save to csv

In [None]:
# transit_data.to_csv(parent_dir + '\\Data\\New\\lms_zone_transit.csv')

In [None]:
new_transit_data = pd.read_csv(parent_dir + '\\Data\\New\\lms_zone_transit.csv', index_col=0)

In [None]:
new_transit_data = new_transit_data.iloc[:, 1:].copy()

In [None]:
new_transit_data.loc[:, 'btm_stops'] = btm_stops
new_transit_data.loc[:, 'Bus_stops'] = bus_stops
new_transit_data.loc[:, 'Metro_stops'] = metro_stops
new_transit_data.loc[:, 'Tram_stops'] = tram_stops

In [None]:
new_transit_data

In [None]:
new_transit_data.to_csv(parent_dir + '\\Data\\New\\lms_zone_transit.csv')