In [4]:
# class RobotabilityGraph that inherits from Graph class 
import os
import sys 
sys.path.append("/share/ju/urban-fingerprinting")

import osmnx as ox 
import geopandas as gpd 
import pandas as pd 
import numpy as np 

import matplotlib.pyplot as plt 
# enable latex plotting 
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

from glob import glob 
from tqdm import tqdm 

from shapely import wkt, LineString 

import rasterio
from rasterio.enums import Resampling
from rasterio.plot import show 


from src.utils.logger import setup_logger 

logger = setup_logger('rs-street-furniture')
logger.setLevel("INFO")
logger.info("Modules initialized.")

WGS='EPSG:4326'
PROJ='EPSG:2263'

REGEN_SEGMENTIZATION=False
REGEN_TOPOLOGY=True

GEN_INSPECTION_PLOTS=True
INSPECTION_PLOTS="figures/inspection_plots"

os.makedirs(INSPECTION_PLOTS, exist_ok=True)


[34m2024-12-10 00:32:00 - rs-street-furniture - INFO - Modules initialized.[0m


## Loading and Preprocessing Data Features 

### Sidewalk Basemap (NYC)

In [5]:
segmentized = pd.read_csv("../data/sidewalks_nyc_segmentized.csv")
segmentized = gpd.GeoDataFrame(segmentized, geometry=segmentized['geometry'].apply(wkt.loads), crs=PROJ)
logger.info("Segmentized sidewalk basemap loaded.")


sidewalk_nyc = segmentized

logger.success("NYC sidewalk basemap loaded.")
logger.info(f"Distribution of sidewalk widths [ft]: \n{sidewalk_nyc['width'].describe()}")

[34m2024-12-10 00:32:13 - rs-street-furniture - INFO - Segmentized sidewalk basemap loaded.[0m
[32m2024-12-10 00:32:13 - rs-street-furniture - SUCCESS - NYC sidewalk basemap loaded.[0m
[34m2024-12-10 00:32:13 - rs-street-furniture - INFO - Distribution of sidewalk widths [ft]: 
count    1.874608e+06
mean     1.001624e+01
std      5.892543e+00
min      3.717949e-02
25%      6.797932e+00
50%      8.901577e+00
75%      1.186210e+01
max      1.652698e+02
Name: width, dtype: float64[0m


In [6]:
# the maximum distance to search for a nearby street segment. Since we segmentize by 50 feet, we can search within 25 feet
MAX_DISTANCE=25

CUTOFF= pd.to_datetime("2023-08-31")


In [7]:

# we buffer each point by 25 feet, creating a 50-diameter circle centered at the point. This captures nearby clutter. 
sidewalk_nyc['geometry'] = sidewalk_nyc['geometry'].buffer(MAX_DISTANCE)

### Bus Stop Shelters 

In [8]:
# read bus stop shelters 
bus_stop_shelters = gpd.read_file("../data/street_furniture/bus_stop_shelters_nyc.csv")
bus_stop_shelters = gpd.GeoDataFrame(bus_stop_shelters, geometry=wkt.loads(bus_stop_shelters['the_geom']), crs=WGS).to_crs(PROJ)

# Bus stop installation date is not present, so filtering is out-of-scoped.

### Trash Cans / Waste Baskets 

In [9]:
# load trash cans 
trash_cans = pd.read_csv("../data/street_furniture/dsny_litter_baskets_nyc.csv")
trash_cans = gpd.GeoDataFrame(trash_cans, geometry=wkt.loads(trash_cans['point']), crs=WGS).to_crs(PROJ)

# trash can installation date is not present, so filtering is out-of-scope

### LinkNYC Kiosks 

In [10]:
# load linknyc
linknyc = pd.read_csv("../data/street_furniture/LinkNYC_Kiosk_Locations_20240816.csv")
linknyc = gpd.GeoDataFrame(linknyc, geometry=gpd.points_from_xy(linknyc['Longitude'], linknyc['Latitude']), crs='EPSG:4326').to_crs(PROJ)

linknyc['Installation Complete'] = pd.to_datetime(linknyc['Installation Complete'])
linknyc = linknyc[linknyc['Installation Complete'] <= CUTOFF]
linknyc['Installation Complete'].describe()

count                             2134
mean     2017-12-14 14:37:54.039362816
min                1971-12-01 00:00:00
25%                2016-11-12 00:00:00
50%                2017-07-18 00:00:00
75%                2018-02-17 00:00:00
max                2023-07-27 00:00:00
Name: Installation Complete, dtype: object

### Bicycle Parking Shelters 

In [11]:
# load bicycle parking shelters 
bicycle_parking_shelters = pd.read_csv("../data/street_furniture/bicycle_parking_shelters_nyc.csv")
bicycle_parking_shelters = gpd.GeoDataFrame(bicycle_parking_shelters, geometry=wkt.loads(bicycle_parking_shelters['the_geom']), crs=WGS).to_crs(PROJ)
bicycle_parking_shelters['Build_date'] = pd.to_datetime(bicycle_parking_shelters['Build_date'])
bicycle_parking_shelters = bicycle_parking_shelters[bicycle_parking_shelters['Build_date'] <= CUTOFF]
bicycle_parking_shelters['Build_date'].describe()

  bicycle_parking_shelters['Build_date'] = pd.to_datetime(bicycle_parking_shelters['Build_date'])


count                               17
mean     2008-07-26 18:21:10.588235264
min                2007-12-17 00:00:00
25%                2008-07-01 00:00:00
50%                2008-09-12 00:00:00
75%                2008-10-13 00:00:00
max                2008-12-17 00:00:00
Name: Build_date, dtype: object

### Bicycle Racks 

In [12]:
# load bicycle racks 
bicycle_racks = gpd.read_file("../data/street_furniture/bicycle_racks_nyc/Bycycle_Parking_2023_07/Bicycle_Parking_2023_07.shp").to_crs(PROJ)
bicycle_racks['Date_Inst'] = pd.to_datetime(bicycle_racks['Date_Inst'])
bicycle_racks = bicycle_racks[bicycle_racks['Date_Inst'] <= CUTOFF]
bicycle_racks['Date_Inst'].describe()

count                            31585
mean     2011-06-02 09:53:44.100047616
min                1900-01-01 00:00:00
25%                2010-06-14 00:00:00
50%                2013-05-31 00:00:00
75%                2017-03-02 00:00:00
max                2023-05-02 00:00:00
Name: Date_Inst, dtype: object

### CityBench 

In [13]:
# load citybench
citybench = pd.read_csv("../data/street_furniture/citybench_nyc.csv")
citybench = gpd.GeoDataFrame(citybench, geometry=gpd.points_from_xy(citybench['Longitude'], citybench['Latitude']), crs='EPSG:4326').to_crs(PROJ)
citybench['Installati'] = pd.to_datetime(citybench['Installati'])
citybench = citybench[citybench['Installati'] <= CUTOFF]
citybench['Installati'].describe()

count                             2141
mean     2015-04-09 03:36:34.301728256
min                2012-04-01 00:00:00
25%                2013-07-10 00:00:00
50%                2014-11-18 00:00:00
75%                2017-02-04 00:00:00
max                2019-09-30 00:00:00
Name: Installati, dtype: object

### Street Trees 

In [14]:
# load trees 
trees = pd.read_csv("../data/street_furniture/forestry_tree_points_nyc.csv", engine='pyarrow')
trees = gpd.GeoDataFrame(trees, geometry=gpd.points_from_xy(trees['longitude'], trees['latitude']), crs='EPSG:4326').to_crs(PROJ)
trees['created_at'] = pd.to_datetime(trees['created_at'])
trees = trees[trees['created_at'] <= CUTOFF]
trees['created_at'].describe()

count                           683788
mean     2015-12-06 07:18:59.574254592
min                2015-05-19 00:00:00
25%                2015-08-29 00:00:00
50%                2015-10-23 00:00:00
75%                2016-02-06 00:00:00
max                2016-10-05 00:00:00
Name: created_at, dtype: object

### News Stands 

In [15]:
# load newsstands 
newsstands = pd.read_csv("../data/street_furniture/newsstands_nyc.csv", engine='pyarrow')
newsstands = gpd.GeoDataFrame(newsstands, geometry=wkt.loads(newsstands['the_geom']), crs='EPSG:4326').to_crs(PROJ)
newsstands['Built_Date'] = pd.to_datetime(newsstands['Built_Date'])
newsstands = newsstands[newsstands['Built_Date'] <= CUTOFF]
newsstands['Built_Date'].describe() 

count                              357
mean     2011-09-04 23:31:45.882352896
min                2007-09-03 00:00:00
25%                2008-07-30 00:00:00
50%                2010-11-08 00:00:00
75%                2013-06-04 00:00:00
max                2021-12-22 00:00:00
Name: Built_Date, dtype: object

### Parking Meters 

In [16]:
# load parking meters 
parking_meters = pd.read_csv("../data/street_furniture/parking_meters_nyc.csv")
parking_meters = gpd.GeoDataFrame(parking_meters, geometry=wkt.loads(parking_meters['Location']), crs='EPSG:4326').to_crs(PROJ)

# parking meter installation date is not present, so filtering is out-of-scope

### Fire Hydrants 

In [17]:
# load hydrants 
hydrants = pd.read_csv("../data/street_furniture/fire_hydrants_nyc.csv")
hydrants = gpd.GeoDataFrame(hydrants, geometry=wkt.loads(hydrants['the_geom']), crs='EPSG:4326').to_crs(PROJ)

# hydrant installation date is not present, so filtering is out-of-scope

### Street Signs 

In [18]:
# load street signs 
street_signs = pd.read_csv("../data/street_furniture/Street_Sign_Work_Orders_20240816.csv", engine='pyarrow')

# only keep 'Current' record type 
street_signs = street_signs[street_signs['record_type'] == 'Current']
street_signs['order_completed_on_date'] = pd.to_datetime(street_signs['order_completed_on_date'])
street_signs = street_signs[street_signs['order_completed_on_date'] <= CUTOFF]
street_signs = gpd.GeoDataFrame(street_signs, geometry=gpd.points_from_xy(street_signs['sign_x_coord'], street_signs['sign_y_coord']), crs=PROJ)
street_signs['order_completed_on_date'].describe()

count                          1032022
mean     2018-07-23 05:09:47.827003648
min                1953-08-07 00:00:00
25%                2017-07-20 00:00:00
50%                2019-11-04 00:00:00
75%                2021-04-14 00:00:00
max                2023-08-31 00:00:00
Name: order_completed_on_date, dtype: object

### Bollards 

In [19]:
# load bollards 
bollards = pd.read_csv("../data/street_furniture/Traffic_Bollards_Tracking_and_Installations_20240816.csv", engine='pyarrow')

bollards['Date'] = pd.to_datetime(bollards['Date'])
bollards = bollards[bollards['Date'] <= CUTOFF]
bollards['Date'].describe()

# we choose not to process bollards, as locations need to be geocoded. Latitude/Longitude is not present in the dataset.

count                            54665
mean     2016-05-17 19:10:59.939632384
min                2005-01-10 00:00:00
25%                2012-09-10 00:00:00
50%                2017-07-12 00:00:00
75%                2020-06-25 00:00:00
max                2023-08-31 00:00:00
Name: Date, dtype: object

### In-Service Fire Alarm Call Boxes 

In [20]:
alarm_call_boxes = pd.read_csv("../data/street_furniture/In-Service_Alarm_Box_Locations_20240816.csv")
alarm_call_boxes = gpd.GeoDataFrame(alarm_call_boxes, geometry=wkt.loads(alarm_call_boxes['Location Point']), crs='EPSG:4326').to_crs(PROJ)

### Scaffolding 

In [21]:
# read DoB active scaffolding permits 
scaffolding_permits = pd.read_csv("../data/dob_active_sheds.csv", engine='pyarrow')
scaffolding_permits = gpd.GeoDataFrame(scaffolding_permits, geometry=gpd.points_from_xy(scaffolding_permits['Longitude Point'], scaffolding_permits['Latitude Point']), crs='EPSG:4326')
scaffolding_permits = scaffolding_permits.to_crs(PROJ)

In [22]:
scaffolding_permits['First Permit Date']  = pd.to_datetime(scaffolding_permits['First Permit Date'])

scaffolding_permits = scaffolding_permits[scaffolding_permits['First Permit Date'] <= CUTOFF]
scaffolding_permits['First Permit Date'].describe()

count                             6470
mean     2022-04-19 04:09:56.476043264
min                2006-04-27 00:00:00
25%                2021-11-05 00:00:00
50%                2022-10-25 00:00:00
75%                2023-04-27 00:00:00
max                2023-08-31 00:00:00
Name: First Permit Date, dtype: object

## Spatial Joining of Street Furnitures to Sidewalk Graph 

In [23]:
# sjoin nearest bus stops and trash cans to sidewalk
len_before = len(sidewalk_nyc)
bus_stop_shelters = gpd.sjoin(sidewalk_nyc, bus_stop_shelters, )
logger.info(f"Missing {len(bus_stop_shelters[bus_stop_shelters['index_right'].isna()])} bus stop shelters.")

[34m2024-12-10 00:32:52 - rs-street-furniture - INFO - Missing 0 bus stop shelters.[0m


In [24]:
# sjoin nearest trash cans to sidewalk
len_before = len(trash_cans)
trash_cans = gpd.sjoin(sidewalk_nyc, trash_cans, )
logger.info(f"Removed {len_before - len(trash_cans)} trash cans that are not on sidewalks.")

[34m2024-12-10 00:32:52 - rs-street-furniture - INFO - Removed 2649 trash cans that are not on sidewalks.[0m


In [25]:
# sjoin nearest linknyc to sidewalk
len_before = len(linknyc)
linknyc = gpd.sjoin(sidewalk_nyc, linknyc, )
logger.info(f"LinkNYC: {len_before} -> {len(linknyc)}")

[34m2024-12-10 00:32:53 - rs-street-furniture - INFO - LinkNYC: 2134 -> 5315[0m


In [26]:
# sjoin nearest citybench 
len_before = len(citybench)
citybench = gpd.sjoin(sidewalk_nyc, citybench, )
logger.info(f"Citybench: {len_before} -> {len(citybench)}")

[34m2024-12-10 00:32:53 - rs-street-furniture - INFO - Citybench: 2141 -> 724[0m


In [27]:
# sjoint nearest bicycle parking shelters to sidewalk
len_before = len(bicycle_parking_shelters)
bicycle_parking_shelters = gpd.sjoin(sidewalk_nyc, bicycle_parking_shelters, )
logger.info(f"Bicycle Parking Shelters: {len_before} -> {len(bicycle_parking_shelters)}")

[34m2024-12-10 00:32:54 - rs-street-furniture - INFO - Bicycle Parking Shelters: 17 -> 28[0m


In [28]:

# sjoin nearest bicycle racks to sidewalk
len_before = len(bicycle_racks)
bicycle_racks = gpd.sjoin(sidewalk_nyc, bicycle_racks, )
logger.info(f"Bicycle Racks: {len_before} -> {len(bicycle_racks)}")

[34m2024-12-10 00:32:54 - rs-street-furniture - INFO - Bicycle Racks: 31585 -> 43334[0m


In [29]:
# sjoin nearest trees to sidewalk
len_before = len(trees)
trees = gpd.sjoin(sidewalk_nyc, trees, )
logger.info(f"Trees: {len_before} -> {len(trees)}")

[34m2024-12-10 00:32:59 - rs-street-furniture - INFO - Trees: 683788 -> 888996[0m


In [30]:
# sjoin nearest newsstands to sidewalk
len_before = len(newsstands)
newsstands = gpd.sjoin(sidewalk_nyc, newsstands, )
logger.info(f"Newsstands: {len_before} -> {len(newsstands)}")

[34m2024-12-10 00:33:00 - rs-street-furniture - INFO - Newsstands: 357 -> 462[0m


In [31]:
# sjoin nearest parking meters to sidewalk
len_before = len(parking_meters)
parking_meters = gpd.sjoin(sidewalk_nyc, parking_meters, )
logger.info(f"Parking Meters: {len_before} -> {len(parking_meters)}")

[34m2024-12-10 00:33:01 - rs-street-furniture - INFO - Parking Meters: 13336 -> 16305[0m


In [32]:
# sjoin nearest hydrants to sidewalk
len_before = len(hydrants)
hydrants = gpd.sjoin(sidewalk_nyc, hydrants, )
logger.info(f"Hydrants: {len_before} -> {len(hydrants)}")

[34m2024-12-10 00:33:02 - rs-street-furniture - INFO - Hydrants: 109410 -> 165327[0m


In [33]:
# sjoin nearest street signs to sidewalk
len_before = len(street_signs)
street_signs = gpd.sjoin(sidewalk_nyc, street_signs, )
logger.info(f"Street Signs: {len_before} -> {len(street_signs)}")

[34m2024-12-10 00:33:06 - rs-street-furniture - INFO - Street Signs: 1032022 -> 714036[0m


In [34]:
# sjoin nearest bollards to sidewalk
#len_before = len(bollards)
#bollards = gpd.sjoin(sidewalk_nyc, bollards )
#logger.info(f"Bollards: {len_before} -> {len(bollards)}")


In [35]:
# sjoin nearest alarm call boxes to sidewalk
len_before = len(alarm_call_boxes)
alarm_call_boxes = gpd.sjoin(sidewalk_nyc, alarm_call_boxes, )
logger.info(f"Alarm Call Boxes: {len_before} -> {len(alarm_call_boxes)}")

[34m2024-12-10 00:33:06 - rs-street-furniture - INFO - Alarm Call Boxes: 13008 -> 8135[0m


In [36]:
BUFFER=100 
# buffer scaffolding_permits points, then sjoin to sidewalks
scaffolding_permits.geometry = scaffolding_permits.geometry.buffer(BUFFER)
scaffolding_permits = gpd.sjoin(sidewalk_nyc, scaffolding_permits, predicate='intersects')

In [37]:

# now, get number of bus stops, trash cans, linknyc, citybench, bicycle parking shelters, and bicycle racks per sidewalk
bus_stop_counts = bus_stop_shelters.groupby('point_index').size().reset_index(name='bus_stop_count').fillna(0)
trash_can_counts = trash_cans.groupby('point_index').size().reset_index(name='trash_can_count').fillna(0)
linknyc_counts = linknyc.groupby('point_index').size().reset_index(name='linknyc_count').fillna(0)
citybench_counts = citybench.groupby('point_index').size().reset_index(name='citybench_count').fillna(0)
bicycle_parking_shelter_counts = bicycle_parking_shelters.groupby('point_index').size().reset_index(name='bicycle_parking_shelter_count').fillna(0)
bicycle_rack_counts = bicycle_racks.groupby('point_index').size().reset_index(name='bicycle_rack_count').fillna(0)
tree_counts = trees.groupby('point_index').size().reset_index(name='tree_count').fillna(0)
newsstand_counts = newsstands.groupby('point_index').size().reset_index(name='newsstand_count').fillna(0)
parking_meter_counts = parking_meters.groupby('point_index').size().reset_index(name='parking_meter_count').fillna(0)
hydrant_counts = hydrants.groupby('point_index').size().reset_index(name='hydrant_count').fillna(0)
street_sign_counts = street_signs.groupby('point_index').size().reset_index(name='street_sign_count').fillna(0)
#bollard_counts = bollards.groupby('point_index').size().reset_index(name='bollard_count').fillna(0)
alarm_call_box_counts = alarm_call_boxes.groupby('point_index').size().reset_index(name='alarm_call_box_count').fillna(0)

In [38]:
# merge scaffolding in 
scaffolding_counts = scaffolding_permits.groupby('point_index').size().reset_index(name='scaffolding_permit_count').fillna(0)

In [39]:

# merge counts to sidewalk_nyc
sidewalk_nyc = sidewalk_nyc.merge(bus_stop_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(trash_can_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(linknyc_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(citybench_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(bicycle_parking_shelter_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(bicycle_rack_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(tree_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(newsstand_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(parking_meter_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(hydrant_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(street_sign_counts, on='point_index', how='left')
#sidewalk_nyc = sidewalk_nyc.merge(bollard_counts, on='point_index', how='left')
sidewalk_nyc = sidewalk_nyc.merge(alarm_call_box_counts, on='point_index', how='left')

In [40]:
# merge scaffolding in 
sidewalk_nyc = sidewalk_nyc.merge(scaffolding_counts, on='point_index', how='left')

In [41]:

sidewalk_nyc = sidewalk_nyc.fillna(0)

In [42]:
sidewalk_nyc.describe([0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99])

Unnamed: 0.1,Unnamed: 0,width,point_index,bus_stop_count,trash_can_count,linknyc_count,citybench_count,bicycle_parking_shelter_count,bicycle_rack_count,tree_count,newsstand_count,parking_meter_count,hydrant_count,street_sign_count,alarm_call_box_count,scaffolding_permit_count
count,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0,1874608.0
mean,937303.5,10.01624,937303.5,0.00274244,0.01175286,0.002835259,0.0003862141,1.493646e-05,0.0231163,0.4742303,0.0002464515,0.008697818,0.08819284,0.3808988,0.004339574,0.05140541
std,541152.9,5.892543,541152.9,0.05310623,0.1079645,0.05323179,0.03062103,0.003864743,0.2294375,0.7146751,0.01576467,0.09296473,0.2854838,1.120235,0.06678703,0.3804588
min,0.0,0.03717949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1%,18746.07,2.308659,18746.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2.5%,46865.18,3.136487,46865.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5%,93730.35,3.941932,93730.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10%,187460.7,4.9577,187460.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,468651.8,6.797932,468651.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,937303.5,8.901577,937303.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
# naive weights based on predicted area of different clutters 
weights = { 
    'bus_stop_count': 2,
    'trash_can_count': 0.5, 
    'linknyc_count': 2, 
    'citybench_count': 1.5,
    'bicycle_parking_shelter_count': 2,
    'bicycle_rack_count': 1.5,
    'tree_count': .15,
    'newsstand_count': 3, 
    'parking_meter_count': .15,
    'scaffolding_permit_count': 2,
    'hydrant_count': 0.25,
    'street_sign_count': 0.05,
    #'bollard_count': 0.05,
    'alarm_call_box_count': .15
}

In [44]:

# create a 'clutter' metric that is the sum of all street clutter features
sidewalk_nyc['clutter'] = 0
for feature, weight in weights.items():
    sidewalk_nyc['clutter'] += sidewalk_nyc[feature] * weight

sidewalk_nyc['clutter'].describe([0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99])

count    1.874608e+06
mean     2.700490e-01
std      8.801910e-01
min      0.000000e+00
1%       0.000000e+00
2.5%     0.000000e+00
5%       0.000000e+00
10%      0.000000e+00
25%      0.000000e+00
50%      5.000000e-02
75%      2.500000e-01
90%      4.000000e-01
95%      1.650000e+00
97.5%    2.200000e+00
99%      4.000000e+00
max      4.350000e+01
Name: clutter, dtype: float64

In [45]:
# Now, weighted clutter by sidewalk width 
sidewalk_nyc['clutter'] = sidewalk_nyc['clutter'] / sidewalk_nyc['width']

In [46]:
# clamp distribution to 1st and 99th percentile
sidewalk_nyc['clutter'] = sidewalk_nyc['clutter'].clip(lower=sidewalk_nyc['clutter'].quantile(0.01), upper=sidewalk_nyc['clutter'].quantile(0.99))

In [47]:
# final describe 
sidewalk_nyc['clutter'].describe([0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99])

count    1.874608e+06
mean     2.559677e-02
std      5.650779e-02
min      0.000000e+00
1%       0.000000e+00
2.5%     0.000000e+00
5%       0.000000e+00
10%      0.000000e+00
25%      0.000000e+00
50%      7.485189e-03
75%      2.555691e-02
90%      5.176617e-02
95%      1.245167e-01
97.5%    2.213307e-01
99%      3.668437e-01
max      3.668446e-01
Name: clutter, dtype: float64

In [48]:
# write street furniture density to csv 
os.makedirs("../data/processed", exist_ok=True)
sidewalk_nyc.to_csv("../data/processed/street_furniture_density.csv", index=False)

In [49]:
# map sidewalk and color by clutter 
fig, ax = plt.subplots(figsize=(20, 20))
sidewalk_nyc.plot(column='clutter', ax=ax, legend=True, cmap='cividis', markersize=0.25, legend_kwds={'label': "Weighted Street Clutter", 'orientation': 'horizontal', 'shrink': 0.5, 'pad': 0.01})
ax.set_axis_off()

plt.savefig("figures/street_furniture_density.png", dpi=300, bbox_inches='tight', pad_inches=0)
plt.close()