# Validation and exploratory data analysis 
# on accessible sidewalk widths

In [None]:
import set_path

import numpy as np
import pandas as pd

import shapely.geometry as sg
import shapely.ops as so
import geopandas as gpd
from geopandas import GeoDataFrame

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib

import contextily as cx

import upc_sw.poly_utils as poly_utils

from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas()

In [None]:
import warnings  # temporary, to supress deprecation warnings from shapely
warnings.filterwarnings('ignore')

## Settings

In [None]:
# Paths
output_legend = '../datasets/output/new/final_output_legend.csv'

bgt_road_file = '../datasets/output/new/bgt_voetpad.gpkg' 

output_file = '../datasets/output/new/final_output_segments_all_vf.geojson'
output_image = '../datasets/output/new/EDA_segments_all_vf.png'
output_image_no = '../datasets/output/new/EDA_segments_all_no_vf.png'

segments_file = '../datasets/output/new/sidewalk_segments.gpkg'

In [None]:
# A CRS tells Python how those coordinates relate to places on the Earth. Rijksdriehoek = epsg:28992
CRS = 'epsg:28992'

# Boundary for filtering out (in meters)
min_path_width = 0.4 

# Boundaries between the final colors green/orange/red (in meters)
width_lower = 0.9
width_upper = 1.8
width_top = 2.9

## Create and store legend

In [None]:
df_legend = pd.DataFrame(np.array([['green', 'green', '>' + str(width_top) + 'm', '>' + str(width_top) + 'm'],
                                   ['lightgreen', 'lightgreen', str(width_upper) + '-' + str(width_top) + 'm', str(width_upper) + '-' + str(width_top) + 'm'],                        
                                   ['orange', 'yellow', str(width_lower) + '-' + str(width_upper) + 'm', str(width_lower) + '-' + str(width_upper) + 'm'],
                                   ['red', 'red', '<' + str(width_lower) + 'm', '<' + str(width_lower) + 'm'],
                                   ['grey', 'grey', 'onbekend', 'unknown']
                                  ]), columns = ['final_color', 'color_displayed', 'label_NL', 'label_ENG'])

In [None]:
df_legend

In [None]:
df_legend.to_csv(output_legend, index=False)

## Import data

In [None]:
# Read BGT data
df_bgt = gpd.read_file(bgt_road_file)
df_bgt = df_bgt.set_crs(CRS, allow_override=True)

In [None]:
# Read width output data (from notebook 6)
df_raw = gpd.read_file(output_file)

### First inspection

In [None]:
df_raw.info()

In [None]:
df_raw.head(2)

In [None]:
df_raw.shape

## Process for analysis

### Check and remove duplicates

In [None]:
df = df_raw.drop(['id', 'object_id'], axis=1)

In [None]:
df = df.drop_duplicates()

In [None]:
df.shape

In [None]:
df[['geometry']].drop_duplicates().shape # check amount of different geometries

### CRS

In [None]:
df = df.to_crs(CRS)

### Add columns

In [None]:
df['length'] = round(df['geometry'].length,2)

### Rename columns

In [None]:
df = df.rename(columns={'full_width': 'full width',
                        'obstacle_free_width': 'obstacle-free width'
                        })

### Order categories

In [None]:
df['obstacle-free width'] = pd.Categorical(df['obstacle-free width'], 
                                           categories=['<0.9m', '0.9-1.8m', '1.8-2.9m', '>2.9m', 'unknown'])

In [None]:
df['full width'] = pd.Categorical(df['full width'], categories=['<0.9m', '0.9-1.8m', '1.8-2.9m', '>2.9m', 'unknown'])

## 1. Images for physical validation

### Get sements data for minor lines image

In [None]:
df_segments = gpd.read_file(segments_file, crs=CRS)

### Select area

In [None]:
#my_lat = 52.3712801 # Iva 1
#my_lon = 4.8524367
#my_lat = 52.3811564 # Iva 2
#my_lon = 4.8688583
#my_lat = 52.3630876091457 # Weesperstraat 113
#my_lon = 4.907223655265446
#my_lat = 52.37330899988559 # Shayla 1 
#my_lon = 4.941617895847236
#my_lat = 52.311846 # Daan 1
#my_lon = 4.973736
#my_lat = 52.315221 # Daan 2
#my_lon = 4.976473
#my_lat = 52.369324 # Iva 3
#my_lon = 4.855894
#my_lat = 52.362991703136586 # Claudia 1
#my_lon = 4.892166054326174
#my_lat = 52.383559 # Iva 4
#my_lon = 4.873875
#my_lat = 52.317213 # Daan 3
#my_lon = 4.968181
#my_lat = 52.35927331379236   # Blog 1
#my_lon = 4.991672146550689
#my_lat = 52.3615579891934 # Blog 2
#my_lon = 4.979760215260225
#my_lat = 52.40526076459767 # Blog 3
#my_lon = 4.913909138960928 
#my_lat = 52.354584356367624 # Blog 4
#my_lon = 4.794089043031844
my_lat = 52.361207 # Claudia 2
my_lon = 4.908029 
#my_lat = 52.372116 # Claudia 3
#my_lon = 4.900810 

In [None]:
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Iva1.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Iva2.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_WPS113.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Shayla1.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Daan1.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Daan2.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Iva3.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Claudia1.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Iva4.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Daan3.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Blog1.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Blog2.png'
#output_image_val_red = '../datasets/output/amsterdam/Sidewalk_validation_Blog2_red.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Blog3.png'
#output_image_val_red = '../datasets/output/amsterdam/Sidewalk_validation_Blog3_red.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Blog4.png'
#output_image_val_red = '../datasets/output/amsterdam/Sidewalk_validation_Blog4_red.png'
output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Claudia2.png'
output_image_val_red = '../datasets/output/amsterdam/Sidewalk_validation_Claudia2_red.png'
#output_image_val = '../datasets/output/amsterdam/Sidewalk_validation_Claudia3.png'
#output_image_val_red = '../datasets/output/amsterdam/Sidewalk_validation_Claudia3_red.png'

In [None]:
# Set size of square you want to validate
my_rad = 250

In [None]:
df_loc = GeoDataFrame({'geometry': [sg.Point(my_lon, my_lat)]}, crs="EPSG:4326").to_crs(CRS)
df_loc['buffer'] = df_loc['geometry'].buffer(my_rad, cap_style=3)

### Do calculation for minor lines image

In [None]:
df_segments_val = gpd.sjoin(df_segments, df_loc.set_geometry('buffer'), how='inner')

In [None]:
df_segments_val = df_segments_val.set_geometry('geometry_left')
df_segments_val = df_segments_val.drop(['index_right', 'geometry_right'], axis=1)
df_segments_val = df_segments_val.rename(columns={'geometry_left':'geometry'})

In [None]:
# Apply minimal path width
df_segments_wide = df_segments_val[df_segments_val['min_width'] > min_path_width].reset_index(drop=True)
print(df_segments_val.shape)
print(df_segments_wide.shape)

In [None]:
# Remove short lines
mls_per_id = poly_utils.create_mls_per_sidewalk(df_segments_wide, crs=CRS)
mls_per_id['geometry'] = mls_per_id['geometry'].progress_apply(
                        lambda x: poly_utils.remove_short_lines(x))

In [None]:
# Apply selection of longer lines to original dataframe
long_segments_df = gpd.GeoDataFrame(mls_per_id.geometry.explode())
df_segments_wide = df_segments_wide.merge(long_segments_df, how='inner')
df_segments_wide.shape

In [None]:
conditions = [
    (df_segments_wide['min_width'] < width_lower),
    (df_segments_wide['min_width'] >= width_lower) & (df_segments_wide['min_width'] < width_upper),
    (df_segments_wide['min_width'] >= width_upper) & (df_segments_wide['min_width'] < width_top),
    (df_segments_wide['min_width'] >= width_top)
]

values = ['red', 'orange', 'lightgreen', 'green']

In [None]:
df_segments_wide['min_width_color'] = np.select(conditions, values)
df_segments_wide['min_width_color'].value_counts()

In [None]:
gdf_segments_wide = GeoDataFrame(df_segments_wide, crs=CRS)

### Get next two images

In [None]:
df_val = gpd.sjoin(df, df_loc.set_geometry('buffer'), how='inner') 
df_val = df_val.set_geometry('geometry_left')

### Get BGT Background

In [None]:
df_bgt_val = gpd.sjoin(df_bgt, df_loc.set_geometry('buffer'), how='inner')
df_bgt_val = df_bgt_val.set_geometry('geometry_left')

### Plot

In [None]:
df_loc = df_loc.set_geometry('buffer')
df_loc = df_loc.to_crs(epsg=3857)
x_min = sg.Polygon(df_loc['buffer'][0]).bounds[0] 
x_max = sg.Polygon(df_loc['buffer'][0]).bounds[2]
y_min = sg.Polygon(df_loc['buffer'][0]).bounds[1]
y_max = sg.Polygon(df_loc['buffer'][0]).bounds[3]

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14,14), dpi=600, 
                                    frameon=False, constrained_layout=True)

# Plot minor paths
df_bgt_val.to_crs(epsg=3857).plot(ax=ax1, color='silver', alpha=0.5)
gdf_segments_wide.to_crs(epsg=3857).plot(ax=ax1, linewidth=1, color=gdf_segments_wide.min_width_color) 
cx.add_basemap(ax1, source = cx.providers.Esri.WorldImagery)
ax1.axis('off')

# Plot OFW
df_bgt_val.to_crs(epsg=3857).plot(ax=ax2, color='silver', alpha=0.5)
df_val.to_crs(epsg=3857).plot(ax=ax2, linewidth=2, color=df_val.final_color) 
cx.add_basemap(ax2, source = cx.providers.Esri.WorldImagery)
ax2.axis('off')

# Plot FW
df_bgt_val.to_crs(epsg=3857).plot(ax=ax3, color='silver', alpha=0.5)
df_val.to_crs(epsg=3857).plot(ax=ax3, linewidth=2, color=df_val.full_width_color)  
cx.add_basemap(ax3, source = cx.providers.Esri.WorldImagery)
ax3.axis('off')

# Create legends
wide = mpatches.Patch(color='green', label='>' + str(width_top) + 'm')
acc = mpatches.Patch(color='lightgreen', label= str(width_upper) + '-' + str(width_top) + 'm')
narrow = mpatches.Patch(color='orange', label= str(width_lower) + '-' + str(width_upper) + 'm')
notacc = mpatches.Patch(color='red', label='<' + str(width_lower) + 'm')
runk = mpatches.Patch(color='grey', label='unknown')
plt.legend(handles=[wide,acc,narrow,notacc,runk], 
           bbox_to_anchor=(1, 0.5, 0.5, 0.5))

ax1.set_title('minor paths (input for obstacle-free width)')
ax2.set_title('obstacle-free width')
ax3.set_title('full width')

ax1.set_xlim([x_min, x_max])
ax1.set_ylim([y_min, y_max])
ax2.set_xlim([x_min, x_max])
ax2.set_ylim([y_min, y_max])
ax3.set_xlim([x_min, x_max])
ax3.set_ylim([y_min, y_max])

plt.savefig(output_image_val, bbox_inches='tight')

plt.show()

In [None]:
df_val2 = df_val[df_val['final_color'] =='red']
df_val2 = df_val2[df_val2['full_width_color'] =='red']

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14,14), dpi=600, 
                                    frameon=False, constrained_layout=True)

# Plot minor paths
df_bgt_val.to_crs(epsg=3857).plot(ax=ax1, color='silver', alpha=0.5)
gdf_segments_wide.to_crs(epsg=3857).plot(ax=ax1, linewidth=1, color=gdf_segments_wide.min_width_color) 
cx.add_basemap(ax1, source = cx.providers.Esri.WorldImagery)
ax1.axis('off')

# Plot OFW
df_bgt_val.to_crs(epsg=3857).plot(ax=ax2, color='silver', alpha=0.5)
df_val2.to_crs(epsg=3857).plot(ax=ax2, linewidth=2, color=df_val2.final_color) 
cx.add_basemap(ax2, source = cx.providers.Esri.WorldImagery)
ax2.axis('off')

# Plot FW
df_bgt_val.to_crs(epsg=3857).plot(ax=ax3, color='silver', alpha=0.5)
df_val2.to_crs(epsg=3857).plot(ax=ax3, linewidth=2, color=df_val2.full_width_color)  
cx.add_basemap(ax3, source = cx.providers.Esri.WorldImagery)
ax3.axis('off')

# Create legends
wide = mpatches.Patch(color='green', label='>' + str(width_top) + 'm')
acc = mpatches.Patch(color='lightgreen', label= str(width_upper) + '-' + str(width_top) + 'm')
narrow = mpatches.Patch(color='orange', label= str(width_lower) + '-' + str(width_upper) + 'm')
notacc = mpatches.Patch(color='red', label='<' + str(width_lower) + 'm')
runk = mpatches.Patch(color='grey', label='unknown')
plt.legend(handles=[wide,acc,narrow,notacc,runk], 
           bbox_to_anchor=(1, 0.5, 0.5, 0.5))

ax1.set_title('minor paths (input for obstacle-free width)')
ax2.set_title('obstacle-free width')
ax3.set_title('full width')

ax1.set_xlim([x_min, x_max])
ax1.set_ylim([y_min, y_max])
ax2.set_xlim([x_min, x_max])
ax2.set_ylim([y_min, y_max])
ax3.set_xlim([x_min, x_max])
ax3.set_ylim([y_min, y_max])

plt.savefig(output_image_val_red, bbox_inches='tight')

plt.show()

## 2. Summary - entire city

### Obstacle free width

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5), dpi=600, 
                                    frameon=False, constrained_layout=True)

# Plot process step 3
df_bgt.plot(ax=ax1, color='silver')
df.plot(ax=ax1, linewidth=1, color=df.final_color)  
ax1.axis('off')

# Plot process step 3
df_bgt.plot(ax=ax2, color='silver')
df.plot(ax=ax2, linewidth=1, color=df.full_width_color) 
ax2.axis('off')

# Create legends
wide = mpatches.Patch(color='green', label='>' + str(width_top) + 'm')
acc = mpatches.Patch(color='lightgreen', label= str(width_upper) + '-' + str(width_top) + 'm')
narrow = mpatches.Patch(color='orange', label= str(width_lower) + '-' + str(width_upper) + 'm')
notacc = mpatches.Patch(color='red', label='<' + str(width_lower) + 'm')
runk = mpatches.Patch(color='grey', label='unknown')

legend1 = plt.legend(handles=[wide,acc,narrow,notacc,runk], 
           bbox_to_anchor=(-0.5, -0.5, 0.5, 0.5), title = "obstacle free width")
plt.legend(handles=[wide,acc,narrow,notacc], 
           bbox_to_anchor=(0.5, -0.5, 0.5, 0.5), title = "full width")
plt.gca().add_artist(legend1)

plt.savefig(output_image_no, bbox_inches='tight')
#plt.show()

In [None]:
df['final_color'].value_counts() 

In [None]:
df_group = df.groupby('obstacle-free width').sum()
df_group['%'] = round(100 * df_group['length'] / sum(df_group['length']), 1)
df_group.reset_index()[['obstacle-free width', '%']].sort_values('obstacle-free width')

#### Without unknown

In [None]:
df_ng = df[df['final_color'] != 'grey'] 

In [None]:
df_group = df_ng.groupby('obstacle-free width').sum()
df_group['%'] = round(100 * df_group['length'] / sum(df_group['length']), 1)
df_group.reset_index()[['obstacle-free width', '%']]

### Full width

In [None]:
df_group = df.groupby('full width').sum()
df_group['%'] = round(100 * df_group['length'] / sum(df_group['length']), 1)
df_group.reset_index()[['full width', '%']]

### Full width vs. obstacle free width

In [None]:
df_group = df_ng.groupby('obstacle-free width').sum()
df_group_mix = df_ng.groupby(['obstacle-free width', 'full width']).sum()

In [None]:
df_group_mix['%'] = round(100 * df_group_mix['length'] / sum(df_group_mix['length']), 1)

In [None]:
df_group_mix_disp = df_group_mix[['%']].reset_index()
df_group_mix_disp[df_group_mix_disp['full width'] != 'unknown'].set_index(['obstacle-free width', 'full width'])

In [None]:
df_group_mix = df_group_mix.reset_index().merge(df_group.reset_index(), on='obstacle-free width', suffixes=('', '_total'))

In [None]:
df_group_mix['%'] = round(100 * df_group_mix['length'] / df_group_mix['length_total'])

In [None]:
df_group_mix[['obstacle-free width', 'full width', '%']][df_group_mix['full width'] != 'unknown'].set_index(['obstacle-free width', 'full width'])

## 3. Summary - aggregated per stadsdeel etc.

### Prepare

In [None]:
# Definitions of areas (downloaded from Amsterdam website)
area_files_path = 'C:/Users/jansen252/Documents/P3_AforAll/DS/Urban_pointCloud_Sidewalk_Width_m2/Urban_PointCloud_Sidewalk_Width/datasets/areas/'

In [None]:
area_level = 'stadsdeel' # buurt, wijk, gebied, stadsdeel

In [None]:
if area_level == 'buurt':
    area_file = area_files_path + 'neighborhoods.json'
    df_area = gpd.read_file(area_file).to_crs(CRS) 
    df_area = df_area.rename(columns={'Buurtnaam': 'area'})
elif area_level == 'wijk':
    area_file = area_files_path + 'wijken.json'
    df_area = gpd.read_file(area_file).to_crs(CRS) 
    df_area = df_area.rename(columns={'Wijk': 'area'})
elif area_level == 'wijk':
    area_file = area_files_path + 'gebieden.json'
    df_area = gpd.read_file(area_file).to_crs(CRS) 
    df_area = df_area.rename(columns={'Gebied': 'area'})
elif area_level == 'stadsdeel':
    area_file = area_files_path + 'stadsdelen_water.json'
    df_area = gpd.read_file(area_file).to_crs(CRS) 
    df_area = df_area.rename(columns={'Stadsdeel': 'area'})

In [None]:
df_area.head(3)

In [None]:
# Remove Weesp
df_area = df_area[df_area['Stadsdeelcode'] != 'S'] 

In [None]:
# Add areas to final dataframe 
df_merge = df.sjoin(df_area[['area', 'geometry']], how='left', predicate='within') # right, intersects?

### Obstacle-free width

In [None]:
# Aggregate by area and color
df_group_sd_color = df_merge.groupby(['obstacle-free width', 'area']).sum().reset_index()

In [None]:
# Add length per area
df_group_sd = df_merge.groupby(['area']).sum().reset_index()
df_group_sd_color = df_group_sd_color.merge(df_group_sd, on='area', suffixes=('', '_per_area'))

In [None]:
# Calculate percentage length
df_group_sd_color['%'] = round(100 * df_group_sd_color['length'] / df_group_sd_color['length_per_area']).astype(int)
df_group_sd_color[['area', 'obstacle-free width', '%']]

#### Without unknown

In [None]:
df_merge_ng = df_merge[df_merge['final_color'] != 'grey'] # remove unknown widths

In [None]:
# Aggregate by area and color
df_group_sd_color = df_merge_ng.groupby(['obstacle-free width', 'area']).sum().reset_index()

In [None]:
# Add length per area
df_group_sd = df_merge_ng.groupby(['area']).sum().reset_index()
df_group_sd_color = df_group_sd_color.merge(df_group_sd, on='area', suffixes=('', '_per_area'))

In [None]:
# Calculate percentage length
df_group_sd_color['%'] = round(100 * df_group_sd_color['length'] / df_group_sd_color['length_per_area']).astype(int)
df_group_sd_color[['area', 'obstacle-free width', '%']]

#### Plot

In [None]:
# Add geometry of area back to the dataframe
df_group_sd_color = df_area.merge(df_group_sd_color, on='area')

In [None]:
# Get subset of data 
df_plot = df_group_sd_color[df_group_sd_color['obstacle-free width'] == '<0.9m']

In [None]:
fig, ax = plt.subplots(figsize = (20, 20), dpi = 200, frameon = False)

# plot all areas
df_area.plot(color='grey', ax=ax) 

# plot variable
df_plot.plot(column ='%', cmap="YlOrRd", ax=ax, legend=True, legend_kwds={'shrink': 0.4}) #, norm=matplotlib.colors.Normalize(vmin=0, vmax=100))

plt.title('Accessibility of sidewalks per area (percentage of <0.9m wide path meters)')
plt.axis('off')

plt.show()

### Full width

In [None]:
# Aggregate by area and color
df_group_sd_color = df_merge.groupby(['full width', 'area']).sum().reset_index()

In [None]:
# Add length per area
df_group_sd = df_merge.groupby(['area']).sum().reset_index()
df_group_sd_color = df_group_sd_color.merge(df_group_sd, on='area', suffixes=('', '_per_area'))

In [None]:
# Calculate percentage length
df_group_sd_color['%'] = round(100 * df_group_sd_color['length'] / df_group_sd_color['length_per_area']).astype(int)
df_group_sd_color[['area', 'full width', '%']]

#### Plot

In [None]:
# Add geometry of area back to the dataframe
df_group_sd_color = df_area.merge(df_group_sd_color, on='area')

In [None]:
# Get subset data
df_plot = df_group_sd_color[df_group_sd_color['full width'].isin(['<0.9m', '0.9-1.8m'])]
df_plot = df_plot.groupby(['area']).sum().reset_index().merge(df_group_sd_color[['area', 'geometry']]) # add percentages of <0.9 and 0.9-1.8 together

In [None]:
# Sort
df_plot[['area', '%']].sort_values(by='%').drop_duplicates()

In [None]:
fig, ax = plt.subplots(figsize = (20, 20), dpi = 200, frameon = False)

# plot all areas
df_area.plot(color='grey', ax=ax) 

# plot variable
df_plot.plot(column ='%', cmap="YlOrRd", ax=ax, legend=True, legend_kwds={'shrink': 0.4}) #, norm=matplotlib.colors.Normalize(vmin=0, vmax=100))

plt.title('Sidewalks <1.8m (already when not taking obstacles into account)')
plt.axis('off')

plt.show()