# FARLAB - The Robotability Score 
Developer: @mattwfranchi

In this notebook, we load, preprocess, and aggregate different types of street furniture to come up with a 'street clutter' metric, at the per-sidewalk level.

Project Members: Matt Franchi, Maria-Teresa Parreira, Frank Bu, Wendy Ju 

In [1]:
# class RobotabilityGraph that inherits from Graph class 
import os
import sys 
sys.path.append("/share/ju/urban-fingerprinting")

import osmnx as ox 
import geopandas as gpd 
import pandas as pd 
import numpy as np 

import matplotlib.pyplot as plt 
# enable latex plotting 
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

from glob import glob 
from tqdm import tqdm 

from shapely import wkt, LineString 

import rasterio
from rasterio.enums import Resampling
from rasterio.plot import show 


from src.utils.logger import setup_logger 

logger = setup_logger('rs-street-furniture')
logger.setLevel("INFO")
logger.info("Modules initialized.")

WGS='EPSG:4326'
PROJ='EPSG:2263'

REGEN_SEGMENTIZATION=False
REGEN_TOPOLOGY=True

GEN_INSPECTION_PLOTS=True
INSPECTION_PLOTS="figures/inspection_plots"

os.makedirs(INSPECTION_PLOTS, exist_ok=True)


[34m2024-08-16 16:02:45 - rs-street-furniture - INFO - Modules initialized.[0m


## Loading and Preprocessing Data Features 

### Sidewalk Basemap (NYC)

In [2]:
# Load the NYC sidewalk basemap 
sidewalk_nyc = pd.read_csv("data/sidewalks_nyc.csv")
sidewalk_nyc = gpd.GeoDataFrame(sidewalk_nyc, geometry=wkt.loads(sidewalk_nyc['the_geom']), crs=WGS).to_crs(PROJ)

In [3]:
# Take out features we don't need, and add a width column 
TO_DROP = ['SUB_CODE', 'FEAT_CODE', 'STATUS', 'the_geom']
sidewalk_nyc = sidewalk_nyc.drop(columns=TO_DROP)
sidewalk_nyc['SHAPE_Width'] = sidewalk_nyc['SHAPE_Area'] / sidewalk_nyc['SHAPE_Leng']

# Simplify 
sidewalk_nyc['geometry'] = sidewalk_nyc['geometry'].simplify(10)

# write to disk 
if REGEN_SEGMENTIZATION:
    # segmentize 
    segmentized = sidewalk_nyc.segmentize(50).extract_unique_points().explode(index_parts=True)

    segmentized = gpd.GeoDataFrame(segmentized).reset_index() 

    segmentized = segmentized.merge(sidewalk_nyc,left_on='level_0',right_index=True).drop(columns=['level_0','level_1','geometry'])
    segmentized['geometry'] = segmentized.iloc[:,0]
    segmentized.drop(segmentized.columns[0],axis=1, inplace=True)
    segmentized = gpd.GeoDataFrame(segmentized, crs=PROJ)

    segmentized.to_file("data/sidewalks_nyc_segmentized.geojson", driver='GeoJSON')
    logger.success("Segmentized sidewalk basemap written to disk.")

else: 
    segmentized = gpd.read_file("data/sidewalks_nyc_segmentized.geojson")
    logger.info("Segmentized sidewalk basemap loaded.")


sidewalk_nyc = segmentized

logger.success("NYC sidewalk basemap loaded.")
logger.info(f"Distribution of sidewalk widths [ft]: \n{sidewalk_nyc['SHAPE_Width'].describe()}")

[34m2024-08-16 16:05:10 - rs-street-furniture - INFO - Segmentized sidewalk basemap loaded.[0m
[32m2024-08-16 16:05:10 - rs-street-furniture - SUCCESS - NYC sidewalk basemap loaded.[0m
[34m2024-08-16 16:05:10 - rs-street-furniture - INFO - Distribution of sidewalk widths [ft]: 
count    2.551208e+06
mean     5.373540e+00
std      1.480766e+00
min      2.710948e-01
25%      4.458703e+00
50%      5.149090e+00
75%      6.177234e+00
max      4.021491e+01
Name: SHAPE_Width, dtype: float64[0m


In [4]:
# set first column to be named 'point index' 
sidewalk_nyc.columns = ['point_index'] + list(sidewalk_nyc.columns[1:])

In [5]:
# the maximum distance to search for a nearby street segment. Since we segmentize by 50 feet, we can search within 25 feet
MAX_DISTANCE=25

CUTOFF= pd.to_datetime("2023-12-02")


In [6]:

# we buffer each point by 25 feet, creating a 50-diameter circle centered at the point. This captures nearby clutter. 
sidewalk_nyc['geometry'] = sidewalk_nyc['geometry'].buffer(MAX_DISTANCE)

### Bus Stop Shelters 

In [7]:
# read bus stop shelters 
bus_stop_shelters = gpd.read_file("data/street_furniture/bus_stop_shelters_nyc.csv")
bus_stop_shelters = gpd.GeoDataFrame(bus_stop_shelters, geometry=wkt.loads(bus_stop_shelters['the_geom']), crs=WGS).to_crs(PROJ)

# Bus stop installation date is not present, so filtering is out-of-scoped.

### Trash Cans / Waste Baskets 

In [8]:
# load trash cans 
trash_cans = pd.read_csv("data/street_furniture/dsny_litter_baskets_nyc.csv")
trash_cans = gpd.GeoDataFrame(trash_cans, geometry=wkt.loads(trash_cans['point']), crs=WGS).to_crs(PROJ)

# trash can installation date is not present, so filtering is out-of-scope

### LinkNYC Kiosks 

In [9]:
# load linknyc
linknyc = pd.read_csv("data/street_furniture/LinkNYC_Kiosk_Locations_20240816.csv")
linknyc = gpd.GeoDataFrame(linknyc, geometry=gpd.points_from_xy(linknyc['Longitude'], linknyc['Latitude']), crs='EPSG:4326').to_crs(PROJ)

linknyc['Installation Complete'] = pd.to_datetime(linknyc['Installation Complete'])
linknyc = linknyc[linknyc['Installation Complete'] <= CUTOFF]
linknyc['Installation Complete'].describe()

count                             2137
mean     2017-12-17 15:39:20.131024896
min                1971-12-01 00:00:00
25%                2016-11-12 00:00:00
50%                2017-07-19 00:00:00
75%                2018-02-17 00:00:00
max                2023-11-30 00:00:00
Name: Installation Complete, dtype: object

### Bicycle Parking Shelters 

In [10]:
# load bicycle parking shelters 
bicycle_parking_shelters = pd.read_csv("data/street_furniture/bicycle_parking_shelters_nyc.csv")
bicycle_parking_shelters = gpd.GeoDataFrame(bicycle_parking_shelters, geometry=wkt.loads(bicycle_parking_shelters['the_geom']), crs=WGS).to_crs(PROJ)
bicycle_parking_shelters['Build_date'] = pd.to_datetime(bicycle_parking_shelters['Build_date'])
bicycle_parking_shelters = bicycle_parking_shelters[bicycle_parking_shelters['Build_date'] <= CUTOFF]
bicycle_parking_shelters['Build_date'].describe()

  bicycle_parking_shelters['Build_date'] = pd.to_datetime(bicycle_parking_shelters['Build_date'])


count                               17
mean     2008-07-26 18:21:10.588235264
min                2007-12-17 00:00:00
25%                2008-07-01 00:00:00
50%                2008-09-12 00:00:00
75%                2008-10-13 00:00:00
max                2008-12-17 00:00:00
Name: Build_date, dtype: object

### Bicycle Racks 

In [11]:
# load bicycle racks 
bicycle_racks = gpd.read_file("data/street_furniture/bicycle_racks_nyc/Bycycle_Parking_2023_07/Bicycle_Parking_2023_07.shp").to_crs(PROJ)
bicycle_racks['Date_Inst'] = pd.to_datetime(bicycle_racks['Date_Inst'])
bicycle_racks = bicycle_racks[bicycle_racks['Date_Inst'] <= CUTOFF]
bicycle_racks['Date_Inst'].describe()

count                            31585
mean     2011-06-02 09:53:44.100047616
min                1900-01-01 00:00:00
25%                2010-06-14 00:00:00
50%                2013-05-31 00:00:00
75%                2017-03-02 00:00:00
max                2023-05-02 00:00:00
Name: Date_Inst, dtype: object

### CityBench 

In [12]:
# load citybench
citybench = pd.read_csv("data/street_furniture/citybench_nyc.csv")
citybench = gpd.GeoDataFrame(citybench, geometry=gpd.points_from_xy(citybench['Longitude'], citybench['Latitude']), crs='EPSG:4326').to_crs(PROJ)
citybench['Installati'] = pd.to_datetime(citybench['Installati'])
citybench = citybench[citybench['Installati'] <= CUTOFF]
citybench['Installati'].describe()

count                             2141
mean     2015-04-09 03:36:34.301728256
min                2012-04-01 00:00:00
25%                2013-07-10 00:00:00
50%                2014-11-18 00:00:00
75%                2017-02-04 00:00:00
max                2019-09-30 00:00:00
Name: Installati, dtype: object

### Street Trees 

In [13]:
# load trees 
trees = pd.read_csv("data/street_furniture/forestry_tree_points_nyc.csv", engine='pyarrow')
trees = gpd.GeoDataFrame(trees, geometry=gpd.points_from_xy(trees['longitude'], trees['latitude']), crs='EPSG:4326').to_crs(PROJ)
trees['created_at'] = pd.to_datetime(trees['created_at'])
trees = trees[trees['created_at'] <= CUTOFF]
trees['created_at'].describe()

count                           683788
mean     2015-12-06 07:18:59.574254592
min                2015-05-19 00:00:00
25%                2015-08-29 00:00:00
50%                2015-10-23 00:00:00
75%                2016-02-06 00:00:00
max                2016-10-05 00:00:00
Name: created_at, dtype: object

### News Stands 

In [14]:
# load newsstands 
newsstands = pd.read_csv("data/street_furniture/newsstands_nyc.csv", engine='pyarrow')
newsstands = gpd.GeoDataFrame(newsstands, geometry=wkt.loads(newsstands['the_geom']), crs='EPSG:4326').to_crs(PROJ)
newsstands['Built_Date'] = pd.to_datetime(newsstands['Built_Date'])
newsstands = newsstands[newsstands['Built_Date'] <= CUTOFF]
newsstands['Built_Date'].describe() 

count                              357
mean     2011-09-04 23:31:45.882352896
min                2007-09-03 00:00:00
25%                2008-07-30 00:00:00
50%                2010-11-08 00:00:00
75%                2013-06-04 00:00:00
max                2021-12-22 00:00:00
Name: Built_Date, dtype: object

### Parking Meters 

In [15]:
# load parking meters 
parking_meters = pd.read_csv("data/street_furniture/parking_meters_nyc.csv")
parking_meters = gpd.GeoDataFrame(parking_meters, geometry=wkt.loads(parking_meters['Location']), crs='EPSG:4326').to_crs(PROJ)

# parking meter installation date is not present, so filtering is out-of-scope

### Fire Hydrants 

In [16]:
# load hydrants 
hydrants = pd.read_csv("data/street_furniture/fire_hydrants_nyc.csv")
hydrants = gpd.GeoDataFrame(hydrants, geometry=wkt.loads(hydrants['the_geom']), crs='EPSG:4326').to_crs(PROJ)

# hydrant installation date is not present, so filtering is out-of-scope

### Street Signs 

In [17]:
# load street signs 
street_signs = pd.read_csv("data/street_furniture/Street_Sign_Work_Orders_20240816.csv", engine='pyarrow')

# only keep 'Current' record type 
street_signs = street_signs[street_signs['record_type'] == 'Current']
street_signs['order_completed_on_date'] = pd.to_datetime(street_signs['order_completed_on_date'])
street_signs = street_signs[street_signs['order_completed_on_date'] <= CUTOFF]
street_signs = gpd.GeoDataFrame(street_signs, geometry=gpd.points_from_xy(street_signs['sign_x_coord'], street_signs['sign_y_coord']), crs=PROJ)
street_signs['order_completed_on_date'].describe()

count                          1071145
mean     2018-10-01 02:10:48.026551040
min                1953-08-07 00:00:00
25%                2017-08-23 00:00:00
50%                2020-01-07 00:00:00
75%                2021-07-15 00:00:00
max                2023-12-02 00:00:00
Name: order_completed_on_date, dtype: object

### Bollards 

In [18]:
# load bollards 
bollards = pd.read_csv("data/street_furniture/Traffic_Bollards_Tracking_and_Installations_20240816.csv", engine='pyarrow')

bollards['Date'] = pd.to_datetime(bollards['Date'])
bollards = bollards[bollards['Date'] <= CUTOFF]
bollards['Date'].describe()

# we choose not to process bollards, as locations need to be geocoded. Latitude/Longitude is not present in the dataset.

count                            54665
mean     2016-05-17 19:10:59.939632384
min                2005-01-10 00:00:00
25%                2012-09-10 00:00:00
50%                2017-07-12 00:00:00
75%                2020-06-25 00:00:00
max                2023-08-31 00:00:00
Name: Date, dtype: object

### In-Service Fire Alarm Call Boxes 

In [19]:
alarm_call_boxes = pd.read_csv("data/street_furniture/In-Service_Alarm_Box_Locations_20240816.csv")
alarm_call_boxes = gpd.GeoDataFrame(alarm_call_boxes, geometry=wkt.loads(alarm_call_boxes['Location Point']), crs='EPSG:4326').to_crs(PROJ)

## Spatial Joining of Street Furnitures to Sidewalk Graph 

In [20]:
# sjoin nearest bus stops and trash cans to sidewalk
len_before = len(sidewalk_nyc)
bus_stop_shelters = gpd.sjoin(sidewalk_nyc, bus_stop_shelters, )
logger.info(f"Missing {len(bus_stop_shelters[bus_stop_shelters['index_right'].isna()])} bus stop shelters.")

[34m2024-08-16 16:05:49 - rs-street-furniture - INFO - Missing 0 bus stop shelters.[0m


In [21]:
# sjoin nearest trash cans to sidewalk
len_before = len(trash_cans)
trash_cans = gpd.sjoin(sidewalk_nyc, trash_cans, )
logger.info(f"Removed {len_before - len(trash_cans)} trash cans that are not on sidewalks.")

[34m2024-08-16 16:05:50 - rs-street-furniture - INFO - Removed 1512 trash cans that are not on sidewalks.[0m


In [22]:
# sjoin nearest linknyc to sidewalk
len_before = len(linknyc)
linknyc = gpd.sjoin(sidewalk_nyc, linknyc, )
logger.info(f"LinkNYC: {len_before} -> {len(linknyc)}")

[34m2024-08-16 16:05:50 - rs-street-furniture - INFO - LinkNYC: 2137 -> 4530[0m


In [23]:
# sjoin nearest citybench 
len_before = len(citybench)
citybench = gpd.sjoin(sidewalk_nyc, citybench, )
logger.info(f"Citybench: {len_before} -> {len(citybench)}")

[34m2024-08-16 16:05:51 - rs-street-furniture - INFO - Citybench: 2141 -> 1336[0m


In [24]:
# sjoint nearest bicycle parking shelters to sidewalk
len_before = len(bicycle_parking_shelters)
bicycle_parking_shelters = gpd.sjoin(sidewalk_nyc, bicycle_parking_shelters, )
logger.info(f"Bicycle Parking Shelters: {len_before} -> {len(bicycle_parking_shelters)}")

[34m2024-08-16 16:05:51 - rs-street-furniture - INFO - Bicycle Parking Shelters: 17 -> 35[0m


In [25]:

# sjoin nearest bicycle racks to sidewalk
len_before = len(bicycle_racks)
bicycle_racks = gpd.sjoin(sidewalk_nyc, bicycle_racks, )
logger.info(f"Bicycle Racks: {len_before} -> {len(bicycle_racks)}")

[34m2024-08-16 16:05:52 - rs-street-furniture - INFO - Bicycle Racks: 31585 -> 57734[0m


In [26]:
# sjoin nearest trees to sidewalk
len_before = len(trees)
trees = gpd.sjoin(sidewalk_nyc, trees, )
logger.info(f"Trees: {len_before} -> {len(trees)}")

[34m2024-08-16 16:05:56 - rs-street-furniture - INFO - Trees: 683788 -> 1369978[0m


In [27]:
# sjoin nearest newsstands to sidewalk
len_before = len(newsstands)
newsstands = gpd.sjoin(sidewalk_nyc, newsstands, )
logger.info(f"Newsstands: {len_before} -> {len(newsstands)}")

[34m2024-08-16 16:05:56 - rs-street-furniture - INFO - Newsstands: 357 -> 537[0m


In [28]:
# sjoin nearest parking meters to sidewalk
len_before = len(parking_meters)
parking_meters = gpd.sjoin(sidewalk_nyc, parking_meters, )
logger.info(f"Parking Meters: {len_before} -> {len(parking_meters)}")

[34m2024-08-16 16:05:57 - rs-street-furniture - INFO - Parking Meters: 13336 -> 25911[0m


In [29]:
# sjoin nearest hydrants to sidewalk
len_before = len(hydrants)
hydrants = gpd.sjoin(sidewalk_nyc, hydrants, )
logger.info(f"Hydrants: {len_before} -> {len(hydrants)}")

[34m2024-08-16 16:05:58 - rs-street-furniture - INFO - Hydrants: 109410 -> 215766[0m


In [30]:
# sjoin nearest street signs to sidewalk
len_before = len(street_signs)
street_signs = gpd.sjoin(sidewalk_nyc, street_signs, )
logger.info(f"Street Signs: {len_before} -> {len(street_signs)}")

[34m2024-08-16 16:06:02 - rs-street-furniture - INFO - Street Signs: 1071145 -> 845326[0m


In [31]:
# sjoin nearest bollards to sidewalk
#len_before = len(bollards)
#bollards = gpd.sjoin(sidewalk_nyc, bollards )
#logger.info(f"Bollards: {len_before} -> {len(bollards)}")


In [32]:
# sjoin nearest alarm call boxes to sidewalk
len_before = len(alarm_call_boxes)
alarm_call_boxes = gpd.sjoin(sidewalk_nyc, alarm_call_boxes, )
logger.info(f"Alarm Call Boxes: {len_before} -> {len(alarm_call_boxes)}")

[34m2024-08-16 16:06:02 - rs-street-furniture - INFO - Alarm Call Boxes: 13008 -> 9746[0m


In [33]:

# now, get number of bus stops, trash cans, linknyc, citybench, bicycle parking shelters, and bicycle racks per sidewalk
bus_stop_counts = bus_stop_shelters.groupby('point_index').size().reset_index(name='bus_stop_count').fillna(0)
trash_can_counts = trash_cans.groupby('point_index').size().reset_index(name='trash_can_count').fillna(0)
linknyc_counts = linknyc.groupby('point_index').size().reset_index(name='linknyc_count').fillna(0)
citybench_counts = citybench.groupby('point_index').size().reset_index(name='citybench_count').fillna(0)
bicycle_parking_shelter_counts = bicycle_parking_shelters.groupby('point_index').size().reset_index(name='bicycle_parking_shelter_count').fillna(0)
bicycle_rack_counts = bicycle_racks.groupby('point_index').size().reset_index(name='bicycle_rack_count').fillna(0)
tree_counts = trees.groupby('point_index').size().reset_index(name='tree_count').fillna(0)
newsstand_counts = newsstands.groupby('point_index').size().reset_index(name='newsstand_count').fillna(0)
parking_meter_counts = parking_meters.groupby('point_index').size().reset_index(name='parking_meter_count').fillna(0)
hydrant_counts = hydrants.groupby('point_index').size().reset_index(name='hydrant_count').fillna(0)
street_sign_counts = street_signs.groupby('point_index').size().reset_index(name='street_sign_count').fillna(0)
#bollard_counts = bollards.groupby('point_index').size().reset_index(name='bollard_count').fillna(0)
alarm_call_box_counts = alarm_call_boxes.groupby('point_index').size().reset_index(name='alarm_call_box_count').fillna(0)

In [34]:

# merge counts to sidewalk_nyc
sidewalk_nyc = sidewalk_nyc.merge(bus_stop_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(trash_can_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(linknyc_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(citybench_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(bicycle_parking_shelter_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(bicycle_rack_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(tree_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(newsstand_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(parking_meter_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(hydrant_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(street_sign_counts, on='point_index', how='left')
#sidewalk_nyc = sidewalk_nyc.merge(bollard_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(alarm_call_box_counts, on='point_index', how='left')

In [35]:

sidewalk_nyc = sidewalk_nyc.fillna(0)

In [36]:
sidewalk_nyc.describe([0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99])

Unnamed: 0,point_index,SHAPE_Leng,SHAPE_Area,SHAPE_Width,bus_stop_count,trash_can_count,linknyc_count,citybench_count,bicycle_parking_shelter_count,bicycle_rack_count,tree_count,newsstand_count,parking_meter_count,hydrant_count,street_sign_count,alarm_call_box_count
count,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0,2551208.0
mean,13316740000.0,5353.513,30532.28,5.37354,0.3014396,1.105768,0.6056127,0.3657926,0.0009144688,2.744538,52.11814,0.07574255,0.8160091,9.92464,104.9743,2.773611
std,5368500000.0,12685.5,80596.08,1.480766,1.19806,4.992232,6.19447,4.152117,0.04281586,14.23518,130.5886,0.8013718,2.195455,41.54547,1033.184,31.39269
min,0.0,13.88815,10.18359,0.2710948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1%,1380000000.0,223.3946,756.6271,2.257815,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2.5%,1380001000.0,473.5645,1814.837,2.724753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5%,3380000000.0,833.0682,3479.187,3.319015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10%,5380001000.0,1453.053,6537.684,3.818261,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,9380001000.0,2449.058,12026.21,4.458703,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,3.0,6.0,0.0
50%,14380000000.0,3306.964,16994.73,5.14909,0.0,0.0,0.0,0.0,0.0,0.0,37.0,0.0,0.0,6.0,19.0,0.0


In [39]:
# naive weights based on predicted area of different clutters 
weights = { 
    'bus_stop_count': 2,
    'trash_can_count': 0.5, 
    'linknyc_count': 2, 
    'citybench_count': 1.5,
    'bicycle_parking_shelter_count': 2,
    'bicycle_rack_count': 1.5,
    'tree_count': .15,
    'newsstand_count': 3, 
    'parking_meter_count': .15,
    #'scaffolding_permit_count': 2,
    'hydrant_count': 0.25,
    'street_sign_count': 0.05,
    #'bollard_count': 0.05,
    'alarm_call_box_count': .15
}

In [40]:

# create a 'clutter' metric that is the sum of all street clutter features
sidewalk_nyc['clutter'] = 0
for feature, weight in weights.items():
    sidewalk_nyc['clutter'] += sidewalk_nyc[feature] * weight

sidewalk_nyc['clutter'].describe([0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99])

count    2.551208e+06
mean     2.334758e+01
std      1.304914e+02
min      0.000000e+00
1%       0.000000e+00
2.5%     0.000000e+00
5%       1.000000e-01
10%      1.000000e+00
25%      5.250000e+00
50%      1.015000e+01
75%      1.640000e+01
90%      2.835000e+01
95%      3.905000e+01
97.5%    5.260000e+01
99%      8.415000e+01
max      1.673750e+03
Name: clutter, dtype: float64

In [42]:
# Now, weighted clutter by sidewalk width 
sidewalk_nyc['clutter'] = sidewalk_nyc['clutter'] / sidewalk_nyc['SHAPE_Width']

In [44]:
# clamp distribution to 1st and 99th percentile
sidewalk_nyc['clutter'] = sidewalk_nyc['clutter'].clip(lower=sidewalk_nyc['clutter'].quantile(0.01), upper=sidewalk_nyc['clutter'].quantile(0.99))

In [45]:
# final describe 
sidewalk_nyc['clutter'].describe([0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99])

count    2.551208e+06
mean     2.420764e+00
std      2.248601e+00
min      0.000000e+00
1%       0.000000e+00
2.5%     0.000000e+00
5%       2.229207e-02
10%      2.061203e-01
25%      1.038298e+00
50%      1.953818e+00
75%      3.077379e+00
90%      4.850562e+00
95%      6.514560e+00
97.5%    8.611576e+00
99%      1.396967e+01
max      1.396967e+01
Name: clutter, dtype: float64

In [47]:
# map sidewalk and color by clutter 
fig, ax = plt.subplots(figsize=(20, 20))
sidewalk_nyc.plot(column='clutter', ax=ax, legend=True, cmap='cividis', markersize=0.25, legend_kwds={'label': "Weighted Street Clutter", 'orientation': 'horizontal', 'shrink': 0.5, 'pad': 0.01})
ax.set_axis_off()

plt.savefig("figures/street_furniture_density.png", dpi=300, bbox_inches='tight', pad_inches=0)
plt.close()

In [48]:
# write street furniture density to csv 
os.makedirs("data/processed", exist_ok=True)
sidewalk_nyc.to_csv("data/processed/street_furniture_density.csv", index=False)