In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
%matplotlib inline
import seaborn as sns; sns.set_theme(color_codes=True)
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
import math    
import folium
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.spatial.distance import pdist
import sys
from haversine import haversine

In [2]:
labels = pd.read_csv('datasets/mikey-devon-labels-round-2-May-12-2023.csv')

In [3]:
labels

Unnamed: 0,username,label_id,street_edge_id,neighborhood,label_type,lat,lng,gsv_panorama_id,time_created,correct,severity,temporary,tag_list,description
0,Devon Snyder,2548,1259,Milton,CurbRamp,41.893814,-88.103554,JqRFV1QowOOjFb-TIMnzMw,2023-03-07 13:42:01.804-08,,1.0,f,,debris
1,Devon Snyder,2549,1259,Milton,SurfaceProblem,41.893818,-88.103470,7aric67UimoidY9pcCuWiQ,2023-03-07 13:43:46.225-08,,1.0,f,grass,
2,Devon Snyder,2550,1259,Milton,SurfaceProblem,41.893982,-88.103340,xzribizdMX2eFxY6W8c7Tg,2023-03-07 13:44:22.433-08,,2.0,f,"cracks,grass",
3,Devon Snyder,2551,1259,Milton,SurfaceProblem,41.894131,-88.103302,JS7TZGSTE9PJI_DpazVx2g,2023-03-07 13:44:52.911-08,,1.0,f,grass,
4,Devon Snyder,2552,1259,Milton,SurfaceProblem,41.894184,-88.103271,JS7TZGSTE9PJI_DpazVx2g,2023-03-07 13:45:15.063-08,,2.0,f,"grass,bumpy",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4880,mikey,7863,809,Windermere,Occlusion,47.661331,-122.271698,Kv4J_ph-JeOR1prz5jKDug,2023-04-22 16:15:07.987-07,,,f,,
4881,mikey,7864,717,Windermere,SurfaceProblem,47.661339,-122.271500,bJ4_8CRv-DF6zvEd0xJejQ,2023-04-22 16:15:35.426-07,,1.0,f,,
4882,mikey,7865,809,Windermere,SurfaceProblem,47.661350,-122.271713,bJ4_8CRv-DF6zvEd0xJejQ,2023-04-22 16:15:41.958-07,,1.0,f,,
4883,mikey,7866,717,Windermere,SurfaceProblem,47.661366,-122.271629,bJ4_8CRv-DF6zvEd0xJejQ,2023-04-22 16:15:47.108-07,,1.0,f,,


In [4]:
#create labels geo dataframe
points = labels.apply(lambda row: Point(row.lng, row.lat), axis=1)
labels_geo = gpd.GeoDataFrame(labels, geometry=points)
labels_geo.crs = {'init': 'epsg:4326'}
labels_geo

Unnamed: 0,username,label_id,street_edge_id,neighborhood,label_type,lat,lng,gsv_panorama_id,time_created,correct,severity,temporary,tag_list,description,geometry
0,Devon Snyder,2548,1259,Milton,CurbRamp,41.893814,-88.103554,JqRFV1QowOOjFb-TIMnzMw,2023-03-07 13:42:01.804-08,,1.0,f,,debris,POINT (-88.10355 41.89381)
1,Devon Snyder,2549,1259,Milton,SurfaceProblem,41.893818,-88.103470,7aric67UimoidY9pcCuWiQ,2023-03-07 13:43:46.225-08,,1.0,f,grass,,POINT (-88.10347 41.89382)
2,Devon Snyder,2550,1259,Milton,SurfaceProblem,41.893982,-88.103340,xzribizdMX2eFxY6W8c7Tg,2023-03-07 13:44:22.433-08,,2.0,f,"cracks,grass",,POINT (-88.10334 41.89398)
3,Devon Snyder,2551,1259,Milton,SurfaceProblem,41.894131,-88.103302,JS7TZGSTE9PJI_DpazVx2g,2023-03-07 13:44:52.911-08,,1.0,f,grass,,POINT (-88.10330 41.89413)
4,Devon Snyder,2552,1259,Milton,SurfaceProblem,41.894184,-88.103271,JS7TZGSTE9PJI_DpazVx2g,2023-03-07 13:45:15.063-08,,2.0,f,"grass,bumpy",,POINT (-88.10327 41.89418)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4880,mikey,7863,809,Windermere,Occlusion,47.661331,-122.271698,Kv4J_ph-JeOR1prz5jKDug,2023-04-22 16:15:07.987-07,,,f,,,POINT (-122.27170 47.66133)
4881,mikey,7864,717,Windermere,SurfaceProblem,47.661339,-122.271500,bJ4_8CRv-DF6zvEd0xJejQ,2023-04-22 16:15:35.426-07,,1.0,f,,,POINT (-122.27150 47.66134)
4882,mikey,7865,809,Windermere,SurfaceProblem,47.661350,-122.271713,bJ4_8CRv-DF6zvEd0xJejQ,2023-04-22 16:15:41.958-07,,1.0,f,,,POINT (-122.27171 47.66135)
4883,mikey,7866,717,Windermere,SurfaceProblem,47.661366,-122.271629,bJ4_8CRv-DF6zvEd0xJejQ,2023-04-22 16:15:47.108-07,,1.0,f,,,POINT (-122.27163 47.66137)


In [5]:
# Seperate Seattle and Chicago data
chicago = labels_geo.loc[labels_geo['lng'] > -105]
seattle = labels_geo.loc[labels_geo['lng'] < -105]

In [6]:
#export to json
chicago.to_file("datasets/chicago-validation-labels-round2.geojson", driver='GeoJSON')
seattle.to_file("datasets/seattle-validation-labels-round2.geojson", driver='GeoJSON')

In [5]:
# read in datasets/streets_all_230515.json
streets_validated = pd.read_csv("datasets/streets/validation-study-streets-round-2.csv")
streets_validated

Unnamed: 0,route_id,region_id,street_edge_id,way_type,geom
0,3,19,1259,secondary,0102000020E610000007000000618500859F0656C0794D...
1,3,19,865,secondary,0102000020E61000000700000035B808E7AE0656C057B9...
2,3,19,1260,secondary,0102000020E610000002000000812C9F40B30656C0607A...
3,3,19,1261,secondary,0102000020E61000000600000033F1587DBF0656C0F4DF...
4,3,19,1257,secondary,0102000020E610000007000000863AAC70CB0656C00410...
...,...,...,...,...,...
241,23,9,33,residential,0102000020E610000002000000FE8F5DB326995EC027A0...
242,23,9,53,residential,0102000020E6100000050000009FB536E826995EC08187...
243,23,9,54,residential,0102000020E61000000C0000003AF361AC25995EC0C2E6...
244,23,9,55,residential,0102000020E61000000600000063974D2528995EC06FA0...


In [15]:
from shapely import wkb

In [16]:
# apply wkb.loads(geom, hex=True) to the geom column
streets_validated['geom'] = streets_validated['geom'].apply(lambda x: wkb.loads(x, hex=True))
streets_validated

Unnamed: 0,route_id,region_id,street_edge_id,way_type,geom
0,3,19,1259,secondary,"LINESTRING (-88.1034863 41.8936948, -88.103244..."
1,3,19,865,secondary,"LINESTRING (-88.1044252 41.8929046, -88.104052..."
2,3,19,1260,secondary,"LINESTRING (-88.1046907 41.8927056, -88.104425..."
3,3,19,1261,secondary,"LINESTRING (-88.1054376 41.8920545, -88.105356..."
4,3,19,1257,secondary,"LINESTRING (-88.106167 41.8911257, -88.106072 ..."
...,...,...,...,...,...
241,23,9,33,residential,"LINESTRING (-122.3929871 47.6787625, -122.3929..."
242,23,9,53,residential,"LINESTRING (-122.3929997 47.6794258, -122.3929..."
243,23,9,54,residential,"LINESTRING (-122.3929244 47.6794642, -122.3929..."
244,23,9,55,residential,"LINESTRING (-122.3930753 47.679481, -122.39307..."


In [18]:
# create streets geo dataframe
streets_validated = gpd.GeoDataFrame(streets_validated, geometry='geom')
streets_validated.crs = {'init': 'epsg:4326'}
streets_validated.explore()

In [20]:
#seperate seattle and chicago based on geometry, chicago >-105 and < -80, seattle < -105
streets_chicago = streets_validated.loc[streets_validated['geom'].bounds['minx'] > -105]
streets_chicago = streets_chicago.loc[streets_chicago['geom'].bounds['minx'] < -80]
streets_seattle = streets_validated.loc[streets_validated['geom'].bounds['minx'] < -105]


In [24]:
#save to geojson
streets_chicago.to_file("datasets/streets/chicago-streets.geojson", driver='GeoJSON')
streets_seattle.to_file("datasets/streets/seattle-streets.geojson", driver='GeoJSON')