In [None]:
import pandas as pd
import joblib

In [None]:
TARGET = "NO2"
OSM_ID = 8269826
MAP_HEX_SIZE = 7
COMMENT = "artificial_min" # String or None
if COMMENT:
    ML_INFERENCE_DATA_FILE = f"../data/{TARGET}_inference_dataset_osm_{OSM_ID}_hex_{MAP_HEX_SIZE}_{COMMENT}.csv"
    ML_MODEL = f"../data/random_forest_{TARGET}_gios_{COMMENT}.pkl"
else:
    ML_INFERENCE_DATA_FILE = f"../data/{TARGET}_inference_dataset_osm_{OSM_ID}_hex_{MAP_HEX_SIZE}.csv"
    ML_MODEL = f"../data/random_forest_{TARGET}_gios.pkl"
SELECTED_PARAMETERS = [
    "tree_cover",
    "grassland",
    "population_density",
    "low_vegetation",
    "medium_vegetation",
    "high_vegetation",
    "road",
    "residential_1",
    "residential_2",
    "residential_3",
    "residential_4",
    "non-residential_1",
    "non-residential_2",
    "non-residential_3",
    "non-residential_4",
    "temperature",
    "temperature_trend_3h",
    "temperature_trend_6h",
    "temperature_anomaly",
    "relative_humidity",
    "relative_humidity_trend_3h",
    "relative_humidity_trend_6h",
    "pressure",
    "pressure_trend_3h",
    "pressure_trend_6h",
    "precipitation",
    "precipitation_trend_3h",
    "precipitation_trend_6h",
    "wind_u",
    "wind_v",
    "traffic_daily_fraction",
    "traffic_yearly_mean_fraction",
    "no2_anomaly",
]

In [None]:
df_inference = pd.read_csv(ML_INFERENCE_DATA_FILE)
df_inference = df_inference[["timestamp_utc", "h3_index"] + SELECTED_PARAMETERS]
df_inference.dropna(inplace=True)
df_inference.head()

In [None]:
df_output = df_inference[["timestamp_utc", "h3_index"]].copy()
df_inference = df_inference.drop(columns=["timestamp_utc", "h3_index"]).copy()

In [None]:
rf_model = joblib.load(ML_MODEL)

In [None]:
df_output[f"{TARGET}_inference"] = rf_model.predict(df_inference)
df_output.head()

In [None]:
df_shp = df_output.copy()

In [None]:
import geopandas as gpd
import h3
from shapely.geometry import Polygon

In [None]:
def cell_to_polygon(cell):
    # Coordinates in (latitude, longitude) format
    coords = h3.cell_to_boundary(cell)
    # Switch to (longitude, latitude) format
    flipped_coords = tuple(coord[::-1] for coord in coords)
    return Polygon(flipped_coords)

In [None]:
df_shp["timestamp_utc"] = pd.to_datetime(df_shp["timestamp_utc"])
df_shp["timestamp_utc"] = df_shp["timestamp_utc"].dt.strftime("%m-%dT%H")

In [None]:
df_shp_pivot = df_shp.pivot_table(
    index=["h3_index"],
    columns="timestamp_utc",
    values="NO2_inference",
)

df_shp_pivot = df_shp_pivot.reset_index()
df_shp_pivot.head()

In [None]:
df_shp_pivot["geometry"] = df_shp_pivot.apply(lambda row: cell_to_polygon(row["h3_index"]), axis=1)
df_shp_pivot.head()

In [None]:
gdf_shp_pivot = gpd.GeoDataFrame(df_shp_pivot, geometry="geometry")
gdf_shp_pivot = gdf_shp_pivot.set_crs(crs="EPSG:4326")

In [None]:
if COMMENT:
    gdf_shp_pivot.to_file(f"inference_NO2_osm_{OSM_ID}_hex_{MAP_HEX_SIZE}_{COMMENT}.shp", driver="ESRI Shapefile")
else:
    gdf_shp_pivot.to_file(f"inference_NO2_osm_{OSM_ID}_hex_{MAP_HEX_SIZE}.shp", driver="ESRI Shapefile")