In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import h3
import pandas as pd
import geopandas as gpd
import numpy as np
from src.settings import *
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from src.tools.osmnx_utils import get_place_dir_name
from src.tools.h3_utils import get_resolution_buffered_suffix, get_edges_with_features_filename
from pathlib import Path
import plotly.express as px
import matplotlib.pyplot as plt
import contextily as ctx
from keplergl import KeplerGl
from src.tools.aggregation import aggregate_hex
import json5 as json
tqdm.pandas()

In [3]:
with open(RAW_DATA_DIR / "featureset_transformation_default.jsonc", "r") as f:
    FEATURESET = json.load(f)

In [4]:
cities = pd.read_csv(RAW_DATA_DIR / "cities.csv")
# cities = cities[(cities.country == "Poland") & (cities.kacper)]
cities

Unnamed: 0,city,country,continent,kacper,szymon,piotr,kamil,regions,to_fix
0,Tokyo,Japan,Asia,False,False,False,False,,True
1,Nur-Sultan,Kazakhstan,Asia,True,True,False,False,,False
2,Doha,Qatar,Asia,True,False,False,False,,True
3,Moscow,Russia,Asia,True,True,False,True,,False
4,St. Petersburg,Russia,Asia,True,False,False,False,,False
...,...,...,...,...,...,...,...,...,...
112,New York City,United States,North America,True,True,False,False,,True
113,Philadelphia,United States,North America,True,True,False,False,,True
114,San Francisco,United States,North America,True,True,False,False,,True
115,Washington D.C.,United States,North America,False,False,False,False,,True


In [5]:
resolution = 9
buffered = True
network_type = "drive"
intersection_based = False

pbar = tqdm(cities.itertuples(), total=cities.shape[0])
hexagons = []
cities_agg = []
edges = []

features_top_level = list(FEATURESET.keys())
df_feature_counts = pd.DataFrame(data={"feature": features_top_level, "count": 0 }).set_index("feature")
n = 0
for row in pbar:
    place_name = f"{row.city},{row.country}"
    place_dir_name = get_place_dir_name(place_name)
    place_dir_path = GENERATED_DATA_DIR / place_dir_name
    gpkg_path = place_dir_path / f"graph_{network_type}.gpkg"
    pbar.set_description(place_name)

    try:

        edges_city = gpd.read_feather(place_dir_path / get_edges_with_features_filename(network_type, resolution, buffered, intersection_based))
        n += len(edges_city)

        edges_city["city"] = row.city
        edges_city["country"] = row.country

        for feature in features_top_level:
            columns = [x for x in edges_city.columns if feature in x]
            count = edges_city[columns].max(axis=1).sum()
            df_feature_counts.loc[feature] += count

        city_agg = edges_city.drop(columns=["u", "v", "key", "id"]).groupby(["country", "city"]).sum()
        cities_agg.append(city_agg)
        # edges.append(edges_city)
    except Exception as e:
        print("\nFailed", place_name, e)
    

df_cities_agg = pd.concat(cities_agg)
df_agg = df_cities_agg.sum()
# edges = pd.concat(edges).reset_index()

del edges_city



Sydney,Australia: 100%|██████████| 117/117 [02:01<00:00,  1.04s/it]              


In [6]:
edges

[]

In [7]:
df_cities_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,oneway_False,oneway_True,lanes_1,lanes_2,lanes_3,lanes_4,lanes_5,lanes_6,lanes_7,lanes_8,...,width_27.0,width_27.5,width_28.0,width_28.5,width_29.0,width_29.5,width_30.0,tunnel_building_passage,tunnel_yes,tunnel_avalanche_protector
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Japan,Tokyo,5088,56,4,401,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,59,0
Kazakhstan,Nur-Sultan,9810,5716,78,823,720,180,4,111,0,0,...,0,0,0,0,0,0,0,0,27,0
Qatar,Doha,15991,13724,4193,11897,2819,821,19,31,5,0,...,0,0,0,0,0,0,13,1,148,0
Russia,Moscow,19462,27233,4808,14297,8160,7203,2253,491,113,11,...,0,0,0,0,0,0,0,64,552,0
Russia,St. Petersburg,12722,22518,6594,16536,6422,3261,416,159,0,0,...,0,0,0,0,0,0,0,25,57,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
United States,New York City,66984,65786,5960,10585,6475,4179,991,308,140,1,...,0,0,0,0,0,0,0,105,381,0
United States,Philadelphia,28246,29638,3324,9026,2533,1321,578,182,75,1,...,0,0,0,0,0,0,0,17,138,0
United States,San Francisco,15284,7074,685,4970,2214,1269,307,20,1,4,...,0,0,0,0,0,0,0,5,71,0
United States,Washington D.C.,15402,8033,2077,6049,2087,1983,230,348,40,43,...,0,0,0,0,0,0,0,19,163,0


In [8]:
df_agg

oneway_False                  2680069
oneway_True                   1413023
lanes_1                        263225
lanes_2                        648563
lanes_3                        204680
                               ...   
width_29.5                          0
width_30.0                         39
tunnel_building_passage          3338
tunnel_yes                      24278
tunnel_avalanche_protector          0
Length: 143, dtype: int64

In [9]:
n

4093075

In [10]:
df_feature_counts

Unnamed: 0_level_0,count
feature,Unnamed: 1_level_1
oneway,4093075
lanes,1145324
highway,4090587
maxspeed,1485936
bridge,106527
access,33990
junction,87430
width,21464
tunnel,27510


In [11]:
fig = px.bar(df_feature_counts * 100 / n, width=1300, title=f"Feature occurence out of total: {n}")
fig.update_layout(
    # xaxis = dict(
    #     tickmode = 'linear',
    # ),
    showlegend = False   
)
fig.show()

In [12]:
for feature in features_top_level:
    columns = list(filter(lambda x: x in df_agg, [f"{feature}_{value}" for value in FEATURESET[feature]]))
    fig = px.bar(df_agg[columns] * 100 / df_agg[columns].sum(), width=1300, title=feature, log_y=True)
    fig.update_layout(
        xaxis = dict(
            tickmode = 'linear',
        ),
        showlegend=False
    )
    fig.show()