# Polygonization by rivers & railways - manual exploration (tags)

In this notebook, for a given city, we
* load the raw OSM data on rivers/railways for this polygon
* visualize to explore
* verify that we can drop Points and Polygons
* annotate all tag values to keep in `barriertags.yml`


In [None]:
# import warnings
# warnings.filterwarnings("ignore")
import os
# import sys
# import argparse
# import random
# from time import time
# import numpy as np
import pandas as pd
# import pickle
import matplotlib.pyplot as plt
import geopandas as gpd
# from shapely.geometry import LineString, Point, MultiPoint, MultiLineString, Polygon
# from tqdm import tqdm
# from geopy.distance import distance, geodesic, great_circle
import osmnx as ox
# import networkx as nx
# import scipy.stats
# from scipy.stats import ks_2samp
# import sklearn
# import igraph
# from igraph import Graph

# from random import choice
# from bisect import bisect_left
# import copy

# from functools import partial
# import pyproj
# from pyproj import Geod
# from pyproj.crs import ProjectedCRS
# from pyproj.crs.coordinate_operation import AzimuthalEquidistantConversion
# from shapely.ops import transform

import yaml
# from utils import *
import shapely
# ox.__version__
import momepy
import folium

In [None]:
# read in and format CBSA table to loop through
cbsacode_file = '../data/cbsacode.csv'
df_cbsacodes = pd.read_csv(cbsacode_file)
df_cbsacodes = df_cbsacodes[["cbsacode", "name", "full_name", "geometry", "west", "south", "east", "north"]]
# convert text to shapely Polygon
df_cbsacodes["geometry"] = df_cbsacodes.geometry.apply(lambda x: shapely.from_wkt(x))
# convert to geodataframe
gdf_cbsacodes = gpd.GeoDataFrame(df_cbsacodes, crs = "EPSG:4326")
gdf_cbsacodes

# Choose city and barrier type to explore

In [None]:
city_name = "Washington"
barrier_type = "waterway"

**load data**

In [None]:
cbsacode = gdf_cbsacodes[gdf_cbsacodes["name"]==city_name].cbsacode.values[0]
folder_osm = "../data/natural_barriers/raw/"
barrier_path = folder_osm + f'{cbsacode}_{barrier_type}.gpkg'
gdf = gpd.read_file(barrier_path)
assert all([t in ["Point", "LineString", "Polygon"] for t in gdf.geom_type.unique()]), "Unexpected geom type, double check"

**Can we drop Points and Polygons?** (if yes - no need to do anything further)

In [None]:
m = gdf[gdf.geom_type=="LineString"].explore(tiles = "CartoDB.Positron", name = "LineString")
gdf[gdf.geom_type=="Point"].explore(m=m, tiles = "CartoDB.Positron", name = "Point", color = "red")
gdf[gdf.geom_type=="Polygon"].explore(m=m, tiles = "CartoDB.Positron", name = "Polygon", color = "green")
folium.LayerControl().add_to(m)
m

**Which tags should we keep?**

In [None]:
gdf = gdf[gdf.geom_type=="LineString"]
gdf[barrier_type].unique()

In [None]:
# m = gdf[(gdf.geom_type=="LineString")&gdf[barrier_type].isin(["rail", "abandoned"])].explore(tiles = "CartoDB.Positron", column = barrier_type, cmap = "Set2", opacity=.9, style_kwds={"weight":5})
# m = gdf[gdf.waterway.isin(["river", "dam"])].explore(tiles = "CartoDB.Positron", column = barrier_type, cmap = "Set2", opacity=.9, style_kwds={"weight":5})
# m = gdf[gdf[barrier_type].isin(["rail", "abandoned", "disused"])].explore(tiles = "CartoDB.Positron", column = barrier_type, cmap = "Set2", opacity=.9, style_kwds={"weight":5})
m = gdf[~gdf[barrier_type].isin(["stream"])].explore(max_zoom=52, tiles = "CartoDB.Positron", column = barrier_type, cmap = "Set2", opacity=.9, style_kwds={"weight":5})
folium.LayerControl().add_to(m)
m

# Notes

* miami, waterways: potentially hard cause many very small polygons (canal system)
* orlando, waterways: potentially too detailed