In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# %matplotlib widget

In [None]:
import h3
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
from src.settings import *
from shapely.geometry import Polygon
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from src.tools.osmnx_utils import get_place_dir_name
from src.tools.h3_utils import get_resolution_buffered_suffix
from pathlib import Path
import plotly.express as px
from src.tools.clustering import cluster_hdbscan
from src.models.tfidf import tfidf
from src.tools.dim_reduction import reduce_umap
import matplotlib.pyplot as plt
import contextily as ctx
from keplergl import KeplerGl

In [None]:
tqdm.pandas()

In [None]:
place_name = "Wrocław,Poland"
place_dir_name = get_place_dir_name(place_name)
resolution = 9
buffered = True
edges = gpd.read_file(GENERATED_DATA_DIR.joinpath(place_dir_name, f"edges_drive_{get_resolution_buffered_suffix(resolution, buffered)}.geojson"), driver="GeoJSON")
edges.tail(5)

In [None]:
hexagons = gpd.read_file(GENERATED_DATA_DIR.joinpath(place_dir_name, f"hex_{get_resolution_buffered_suffix(resolution, buffered)}.geojson"), driver="GeoJSON").set_index("h3_id")
hexagons.head(5)

In [None]:
edges_shp = gpd.read_file(GENERATED_DATA_DIR.joinpath(place_dir_name, "shp_drive", f"edges_hex_{get_resolution_buffered_suffix(resolution, buffered)}.shp")).set_index("h3_id")

In [None]:
edges.columns

In [None]:
from src.tools.aggregation import aggregate_hex
hex_feautres = aggregate_hex(edges)

In [None]:
hex_features_tfidf = tfidf(hex_feautres)
hex_features_tfidf

In [None]:
embedding = reduce_umap(hex_features_tfidf, n_components=2, n_neighbors=7, metric="cosine")[0]

In [None]:
embedding["cluster"] = cluster_hdbscan(embedding, min_cluster_size=30, metric="euclidean")[0]
fig = px.scatter(embedding, x="x_0", y="x_1", color="cluster", width=800, height=700)
fig.show()

In [None]:
hexagons_clustered = hexagons.join(embedding).dropna()
hexagons_clustered.head(5)

In [None]:
from src.tools.vis_utils import plot_hexagons_map

plot_hexagons_map(hexagons_clustered[hexagons_clustered["cluster"] != -1], edges, "cluster")

In [None]:
hex_group_cluster = hex_features_tfidf.join(embedding).groupby(by="cluster")

In [None]:
hex_agg = hex_group_cluster.mean().reset_index()
hex_agg

In [None]:
fig = px.bar(hex_agg, x="cluster", y=list(filter(lambda x: "bridge" in x, hex_agg.columns)), width=1300)
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
    )
)
fig.show()

In [None]:
from src.tools.feature_extraction import melt_and_max

for column_name in ["oneway", "lanes", "maxspeed", "width"]:
    columns = [x for x in edges.columns if column_name in x]
    edges = edges.join(melt_and_max(edges, column_name, columns), on="id")
edges