In [None]:
import os

import geopandas as gpd
import torch
import torch.nn as nn
import torch.optim as optim
from shapely.geometry import Polygon

from srai.datasets import AirbnbMulticityDataset
from srai.h3 import h3_to_geoseries
from srai.models import Evaluator, Predictor, RegressionBaseModel, Trainer, Vectorizer
from srai.plotting import plot_numeric_data
from srai.regionalizers import H3Regionalizer

In [None]:
airbnb = AirbnbMulticityDataset()
gdf_airbnb = airbnb.load(os.getenv("HF_TOKEN"))
gdf_airbnb = gdf_airbnb.loc[gdf_airbnb["city"].isin(["paris"])]

In [None]:
resolution = 8

In [None]:
train_gdf, dev_gdf, test_gdf = airbnb.train_dev_test_split_bucket(gdf_airbnb)

In [None]:
vectorizer = Vectorizer(
    gdf_train=train_gdf,
    HF_dataset_object=airbnb,
    embedder_type="Hex2VecEmbedder",
    h3_resolution=resolution,
)

In [None]:
dataset_airbnb_train = vectorizer.get_dataset(train_gdf)
embedding_size = dataset_airbnb_train["X"].shape[1]

In [None]:
dataset_airbnb_test = vectorizer.get_dataset(test_gdf)

In [None]:
dataset_airbnb_dev = vectorizer.get_dataset(dev_gdf)

In [None]:
dataset_airbnb_dev

In [None]:
dataset_airbnb_test

In [None]:
dataset_airbnb_train

In [None]:
type(dataset_airbnb_train)

In [None]:
dataset_airbnb_train

In [None]:
regression_model = RegressionBaseModel(embedding_size)

In [None]:
loss_fn = nn.L1Loss()
optimizer = optim.Adam(regression_model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
args = {
    "batch_size": 32,
    "task": "regression",
    "epochs": 50,
    "device": device,
    "metric2look4": "MAE",
}
trainer = Trainer(
    model=regression_model,
    train_dataset=dataset_airbnb_train,
    eval_dataset=dataset_airbnb_dev,
    optimizer=optimizer,
    loss_fn=loss_fn,
    **args,
)

In [None]:
model, _, _ = trainer.train()

In [None]:
evaluator = Evaluator(task="regression", device=device)

In [None]:
evaluator.evaluate(model, dataset_airbnb_test, return_metrics=False)

In [None]:
predictor = Predictor("regression", device=device)

In [None]:
_, hexes, values = predictor.predict(model, dataset_airbnb_test, resolution=resolution)

In [None]:
original_label = [dataset_airbnb_test[i]["y"] for i in range(len(dataset_airbnb_test))]
original_hexes = [dataset_airbnb_test[i]["X_h3_idx"] for i in range(len(dataset_airbnb_test))]

In [None]:
polygons = h3_to_geoseries(
    hexes,
)
preds_gdf = gpd.GeoDataFrame(geometry=polygons)
preds_gdf.crs = {"init": "epsg:4326"}
preds_gdf["price"] = [tensor.item() for tensor in values]
preds_gdf["region_id"] = hexes
preds_gdf.index = preds_gdf["region_id"]

original_polygons = h3_to_geoseries(original_hexes)
original_gdf = gpd.GeoDataFrame(geometry=[Polygon(polygon) for polygon in original_polygons])
original_gdf.crs = {"init": "epsg:4326"}
original_gdf["price"] = [tensor.item() for tensor in original_label]
original_gdf["region_id"] = original_hexes
original_gdf.index = original_gdf["region_id"]

In [None]:
regionalizer = H3Regionalizer(resolution=resolution)
regions = regionalizer.transform(original_gdf)
plot_numeric_data(regions, "price", original_gdf)

In [None]:
plot_numeric_data(regions, "price", preds_gdf)