# 1. Imports


In [1]:
import sys
sys.path.append("../../")


import geopandas as gpd
from src.organized_datasets_creation.utils import resolve_nominatim_city_name
from src.organized_datasets_creation.utils import convert_nominatim_name_to_filename
from src.graph_layering.graph_layer_creator import GraphLayerController
import pandas as pd
from typing import cast
import os
from src.graph_layering.graph_layer_creator import SourceType
import warnings
from src.graph_layering.create_hetero_data import create_hetero_data

from tqdm import tqdm

import wandb.util
import wandb
import os

import numpy as np
from src.graph.create_osmnx_graph import OSMnxGraph
import json
from shapely.geometry import Point
from joblib import dump


from datetime import datetime
from sklearn.metrics import f1_score, roc_auc_score
from wandb.util import generate_id
from sklearn.linear_model import LogisticRegression
from src.training.train import train
from sklearn.preprocessing import StandardScaler


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

# 2. Setting up env variables and configs


In [3]:
WANDB_API_KEY = os.environ.get("WANDB_API_KEY", None)
assert (
    WANDB_API_KEY is not None
), "WANDB_API_KEY is not set, did you forget it in the config file?"

In [5]:
# general settings
ORGANIZED_HEXES_LOCATION = "../../data/organized-hexes"
ORGANIZED_GRAPHS_LOCATION = "../../data/organized_graphs"
OSMNX_ALL_ATTRIBUTES_LOCATION = (
    "../../data/osmnx_attributes.json"
)

HEX_FI_LOCATION = (
    "../../data/downstream_tasks/feature_importance"
)

# downstream task settings
ACCIDENTS_LOCATION = "../../data/downstream_tasks/accidents_prediction/accidents.csv"
TRAIN_SAVE_DIR = "../../gradient_logs/"

SWEEP_RUNS_COUNT = 50
EPOCHS = 300

ATTRIBUTES_CONFIGURATIONS = [
    {
        "USE_ORTOPHOTO": False,
        "USE_HEXES_ATTRS": {"NUM_FEATURES": 20, "IN_PERCENT": False},
        "USE_OSMNX_ATTRS": True,
    },
    {
        "USE_ORTOPHOTO": False,
        "USE_HEXES_ATTRS": {"NUM_FEATURES": 20, "IN_PERCENT": True},
        "USE_OSMNX_ATTRS": True,
    },
    {
        "USE_ORTOPHOTO": False,
        "USE_HEXES_ATTRS": {"NUM_FEATURES": 50, "IN_PERCENT": False},
        "USE_OSMNX_ATTRS": True,
    },
    {
        "USE_ORTOPHOTO": False,
        "USE_HEXES_ATTRS": {"NUM_FEATURES": 50, "IN_PERCENT": True},
        "USE_OSMNX_ATTRS": True,
    },
]

WANDB_SWEEP_PARAMS_GRAPH_DATA = {
    "method": "bayes",
    "metric": {"name": "mean_f1", "goal": "maximize"},
    "parameters": {
        "hidden_channels": {"values": [10, 20, 30, 40, 50]},
        "learning_rate": {
            "distribution": "log_uniform_values",
            "min": 1e-5,
            "max": 1e-2,
        },
        "num_conv_layers": {"values": [1, 2, 3, 4, 5]},
        "lin_layer_size": {"values": [8, 16, 32, 64, 128]},
        "num_lin_layers": {"values": [0, 1, 2, 3, 4]},
        "weight_decay": {
            "distribution": "log_uniform_values",
            "min": 1e-5,
            "max": 1e-2,
        },
    },
}

WANDB_SWEEP_PARAMS_TABULAR_DATA = {
    "method": "bayes",
    "metric": {"name": "mean_f1", "goal": "maximize"},
    "parameters": {
        "solver_penalty": {
            "values": [
                "lbfgs;l2",
                "liblinear;l1",
                "liblinear;l2",
                "newton-cg;l2",
                "newton-cholesky;l2",
                "sag;l2",
                "saga;elasticnet",
                "saga;l1",
                "saga;l2",
            ]
        },
        "C": {
            "distribution": "log_uniform_values",
            "min": 1e-5,
            "max": 1,
        },
    },
}

In [6]:
def verify_attributes_configurations(configurations):
    for item in configurations:
        assert "USE_ORTOPHOTO" in item and isinstance(
            item["USE_ORTOPHOTO"], bool
        ), f"Invalid configuration: {item}, missing or invalid USE_ORTOPHOTO"
        assert "USE_OSMNX_ATTRS" in item and isinstance(
            item["USE_OSMNX_ATTRS"], bool
        ), f"Invalid configuration: {item}, missing or invalid USE_OSMNX_ATTRS"

        assert "USE_HEXES_ATTRS" in item, "Missing USE_HEXES_ATTRS"
        if not isinstance(item["USE_HEXES_ATTRS"], bool):
            assert isinstance(
                item["USE_HEXES_ATTRS"], dict
            ), "USE_HEXES_ATTRS should be a dict"
            assert "NUM_FEATURES" in item["USE_HEXES_ATTRS"], "Missing NUM_FEATURES"
            assert "IN_PERCENT" in item["USE_HEXES_ATTRS"], "Missing IN_PERCENT"
            assert isinstance(
                item["USE_HEXES_ATTRS"]["NUM_FEATURES"], int
            ), "NUM_FEATURES should be an int"
            assert isinstance(
                item["USE_HEXES_ATTRS"]["IN_PERCENT"], bool
            ), "IN_PERCENT should be a bool"


verify_attributes_configurations(ATTRIBUTES_CONFIGURATIONS)

# 3. Loading accidents

The process includes removing unused columns and creating GeoSeries from raw X Y points


In [7]:
accidents = pd.read_csv(ACCIDENTS_LOCATION)


def create_point(x):
    return Point(float(x[0]), float(x[1]))


geometry = accidents[["wsp_gps_x", "wsp_gps_y"]].apply(create_point, axis=1)

gdf_accidents = gpd.GeoDataFrame(accidents, geometry=geometry, crs="EPSG:4326")
gdf_accidents.drop(columns=["wsp_gps_x", "wsp_gps_y", "uczestnicy"], inplace=True)
gdf_accidents

Unnamed: 0,id_w_czas,czas_zdarzenia,woj_nazwa,pow_nazwa,gmi_nazwa,mie_nazwa,opis_zdarzenia,zdarzenie_id,id_systemu_zr,year,month,day,geometry
0,2020-01-03,12:31,DOLNOŚLĄSKIE,Wrocław,Wrocław,Wrocław,Najechanie na pieszego,199070689,108529985,2020,1,3,POINT (17.04728 51.10575)
1,2020-01-04,20:15,DOLNOŚLĄSKIE,Wrocław,Wrocław,Wrocław,Najechanie na pieszego,199072299,108535968,2020,1,4,POINT (17.02989 51.10906)
2,2020-01-07,06:46,DOLNOŚLĄSKIE,Wrocław,Wrocław,Wrocław,Zderzenie pojazdów boczne,199072513,108539746,2020,1,7,POINT (17.01919 51.10369)
3,2020-01-04,01:56,DOLNOŚLĄSKIE,Wrocław,Wrocław,Wrocław,"Najechanie na słup, znak",199073138,108530771,2020,1,4,POINT (17.01456 51.09653)
4,2020-01-11,15:36,DOLNOŚLĄSKIE,Wrocław,Wrocław,Wrocław,Zderzenie pojazdów boczne,199082756,108556472,2020,1,11,POINT (17.09167 51.11358)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19893,2018-09-21,03:43,MAZOWIECKIE,Warszawa,Warszawa,Warszawa,Zderzenie pojazdów boczne,186601923,107299653,2018,9,21,POINT (20.96372 52.18850)
19894,2018-10-23,20:30,MAZOWIECKIE,Warszawa,Warszawa,Warszawa,Najechanie na pieszego,186638270,107395682,2018,10,23,POINT (20.99289 52.19678)
19895,2018-08-14,16:50,MAZOWIECKIE,Warszawa,Warszawa,Warszawa,Najechanie na pieszego,186754851,107182443,2018,8,14,POINT (20.97250 52.19344)
19896,2018-10-18,13:00,MAZOWIECKIE,Warszawa,Warszawa,Warszawa,Najechanie na pieszego,186776202,107387087,2018,10,18,POINT (21.03694 52.20669)


# 4. Displaying available cities


In [8]:
cities = list(map(lambda x: x + ", Poland", accidents["mie_nazwa"].unique()))
print("Cities:")
print(cities)

Cities:
['Wrocław, Poland', 'Szczecin, Poland', 'Poznań, Poland', 'Kraków, Poland', 'Warszawa, Poland']


# 5. Creating GeoDataFrames

The process of creation has following steps:

1. loading OSMNX nodes and edges
2. assigning accidents to OSMNX nodes
3. taking latest H9 resolution hexes
4. combining OSMNX nodes, OSMNX edges, hexes in a single dict and packing it inside gdfs_dict


In [9]:
def add_accidents_to_osmnx_nodes(
    accidents: gpd.GeoDataFrame,
    nodes: gpd.GeoDataFrame,
    edges: gpd.GeoDataFrame,
    city_name: str,
):
    with open(OSMNX_ALL_ATTRIBUTES_LOCATION) as f:
        all_attributes = json.load(f)

    osmnx_graph = OSMnxGraph(
        accidents.loc[
            accidents["mie_nazwa"] == resolve_nominatim_city_name(city_name), :
        ],
        nodes,
        edges,
        all_attributes,
    )
    osmnx_graph._aggregate(element_type="node", aggregation_method="count")
    return osmnx_graph.gdf_nodes


def create_gdfs(city_name: str, accidents_gdf: gpd.GeoDataFrame = gdf_accidents):
    city_folder_name = convert_nominatim_name_to_filename(
        resolve_nominatim_city_name(city_name)
    )
    osmnx_nodes = gpd.read_parquet(
        os.path.join(ORGANIZED_GRAPHS_LOCATION, city_folder_name, "nodes.parquet")
    )
    osmnx_nodes = osmnx_nodes.reset_index()
    osmnx_nodes.index.names = ["node_id"]
    osmnx_nodes["x"] = osmnx_nodes["geometry"].x
    osmnx_nodes["y"] = osmnx_nodes["geometry"].y

    osmnx_edges = gpd.read_parquet(
        os.path.join(ORGANIZED_GRAPHS_LOCATION, city_folder_name, "edges.parquet")
    )
    osmnx_edges = osmnx_edges.reset_index().rename(columns={"index": "edge_id"})
    osmnx_edges.index.names = ["edge_id"]

    assert osmnx_nodes.crs == osmnx_edges.crs
    assert osmnx_nodes.crs == accidents_gdf.crs

    osmnx_nodes = add_accidents_to_osmnx_nodes(
        accidents=accidents_gdf,
        nodes=osmnx_nodes,
        city_name=city_name,
        edges=osmnx_edges,
    )

    hexes_years_folder = os.path.join(ORGANIZED_HEXES_LOCATION, city_folder_name)

    subfolders = [
        int(f)
        for f in os.listdir(hexes_years_folder)
        if os.path.isdir(os.path.join(hexes_years_folder, f))
    ]
    highest_year = subfolders[np.argmax(subfolders)]

    hexes: gpd.GeoDataFrame = gpd.read_parquet(
        os.path.join(
            ORGANIZED_HEXES_LOCATION,
            f"{convert_nominatim_name_to_filename(resolve_nominatim_city_name(city_name))}/{highest_year}/h9/count-embedder/dataset.parquet",
        )
    )

    hexes = hexes.rename(columns={"region_id": "h3_id"}).rename_axis(
        "region_id", axis=0
    )

    return dict(osmnx_nodes=osmnx_nodes, osmnx_edges=osmnx_edges, hexes=hexes)


print("Creating gdfs...")
gdfs_dict = {city_name: create_gdfs(city_name) for city_name in tqdm(cities)}

Creating gdfs...


100%|██████████| 5/5 [00:01<00:00,  3.09it/s]


# 6. Creating GraphLayerController for each of the cities

The creation is based on previously made GeoDataFrames. The controller is used to transfer accidents Y values from OSMNX nodes to hexes. It is also used to create complete graph data in case of graph datasets.


In [10]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for gdf_for_city in gdfs_dict.values():
        gdf_for_city["controller"] = GraphLayerController(
            gdf_for_city["hexes"],
            gdf_for_city["osmnx_nodes"],
            gdf_for_city["osmnx_edges"],
        )

# 7. Patching hexes

The y value (1 = accident occured, 0 = no accident) is assigned to each of the hexes according to its underlying OSMNX nodes


In [11]:
def patch_hexes_with_y(
    osmnx_nodes: gpd.GeoDataFrame,
    hexes: gpd.GeoDataFrame,
    controller: GraphLayerController,
):
    virtual_edges = controller.get_virtual_edges_to_hexes(SourceType.OSMNX_NODES)
    hexes_with_y = cast(
        gpd.GeoDataFrame,
        hexes.merge(
            virtual_edges.merge(osmnx_nodes, left_on="source_id", right_index=True)[
                ["region_id", "accidents_count"]
            ]
            .groupby("region_id")
            .sum(),
            left_index=True,
            right_index=True,
            how="left",
        ).fillna(0),
    )
    hexes_with_y["accident_occured"] = (hexes_with_y["accidents_count"] > 0).astype(int)
    hexes_with_y.drop(columns="accidents_count", inplace=True)
    controller.hexes_gdf = hexes_with_y
    controller._hexes_centroids_gdf = controller._create_hexes_centroids_gdf()

In [12]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for gdfs in gdfs_dict.values():
        patch_hexes_with_y(gdfs["osmnx_nodes"], gdfs["hexes"], gdfs["controller"])

# 8. Creating graph data

Graph data is used when we include OSMNX attributes and in turn maintain the graph structure of the data

The data is created only once for now just to create (train, val, test) folds labels for crossvalidation on graph-based versions of the task


In [13]:
from typing import List, Literal, Union


def create_graph_data(
    osmnx_nodes,
    osmnx_edges,
    hexes,
    controller: GraphLayerController,
    use_hexes_attr: bool,
    use_ortophoto: bool,
    columns_to_take: Union[List[str], Literal["all"]] = [],
):

    edges_attr_columns = osmnx_edges.columns[
        ~osmnx_edges.columns.isin(["u", "v", "key", "geometry"])
    ]
    nodes_attr_columns = osmnx_nodes.columns[
        ~osmnx_nodes.columns.isin(["geometry", "x", "y", "osmid", 'accidents_count'])
    ]

    if use_hexes_attr:
        hexes_attr_columns = (
            hexes.columns[~hexes.columns.isin(["geometry", "h3_id", "accidents_count"])]
            if columns_to_take == "all"
            else columns_to_take
        )
    else:
        hexes_attr_columns = []

    data = create_hetero_data(
        controller,
        hexes_attrs_columns_names=hexes_attr_columns,
        osmnx_edge_attrs_columns_names=edges_attr_columns,
        osmnx_node_attrs_columns_names=nodes_attr_columns,
        virtual_edge_attrs_columns_names=[],
        hexes_y_columns_names=["accident_occured"],
    )
    return data


graph_data_dict = {
    city_name: create_graph_data(**gdfs, use_ortophoto=True, use_hexes_attr=True)
    for city_name, gdfs in gdfs_dict.items()
}

In [14]:
graph_data_dict['Wrocław, Poland']['hex']

{'x': tensor([], size=(3168, 0)), 'y': tensor([0, 1, 1,  ..., 0, 0, 0])}

# 9. Creating tabular data

Tabular data is used when we omit OSMNX attributes and in turn lose the graph structure of the data

No folds creation on tabular-based versions of the task - using simple leave-one-out


In [15]:
def create_tabular_data(
    hexes: pd.DataFrame,
    controller: GraphLayerController,
    use_hexes_attr: bool,
    use_ortophoto: bool,
):
    assert use_ortophoto or use_hexes_attr, "Provide at least one data source"

    hexes_attr_columns = (
        hexes.columns[~hexes.columns.isin(["geometry", "h3_id"])]
        if use_hexes_attr
        else []
    )

    hexes_y_columns_names = ["accident_occured"]

    X = hexes[hexes_attr_columns]
    y = controller.hexes_centroids_gdf[hexes_y_columns_names]

    return {"X": X, "y": y}


tabular_data_dict = {
    city_name: create_tabular_data(
        gdfs["hexes"],
        cast(GraphLayerController, gdfs["controller"]),
        use_ortophoto=False,
        use_hexes_attr=True,
    )
    for city_name, gdfs in gdfs_dict.items()
}

# 10. Creating folds labels


In [16]:
def shift_elements_right(lst):
    shifted_lst = [lst[-1]] + lst[:-1]
    return shifted_lst


cities_names_list = list(graph_data_dict.keys())
cities_names_list.sort(key=lambda x: str(x))

# val + test
folds_tuples = list(zip(shift_elements_right(cities_names_list), cities_names_list))
display(folds_tuples)

[('Wrocław, Poland', 'Kraków, Poland'),
 ('Kraków, Poland', 'Poznań, Poland'),
 ('Poznań, Poland', 'Szczecin, Poland'),
 ('Szczecin, Poland', 'Warszawa, Poland'),
 ('Warszawa, Poland', 'Wrocław, Poland')]

# 11. Functions setup


In [17]:
def run_k_fold_graph_data(closure_config, sweep_id):
    # pass external config (i.e. what attributes are used in the data), closure to avoid passing it to the function directly
    def wrapped():
        run = wandb.init()
        epochs = EPOCHS

        config = wandb.config

        for k, v in closure_config.items():
            run.log({k: 1 if v else 0})

        run.log({"data_structure": "graph"})

        # create hparams
        if hasattr(config, "lin_layer_size") and hasattr(config, "num_lin_layers"):
            lin_layer_sizes = [config.lin_layer_size] * config.num_lin_layers
        else:
            lin_layer_sizes = config.lin_layer_sizes
        hparams = {
            "hidden_channels": config.hidden_channels,
            "lr": config.learning_rate,
            "num_conv_layers": config.num_conv_layers,
            "lin_layer_sizes": lin_layer_sizes,
            "weight_decay": config.weight_decay,
        }

        aucs = []
        accuracies = []
        f1s = []

        fold_group_id = generate_id()

        # log data as artifact if no data was logged in the sweep before
        # dataset is uploaded only on the first run in sweep, because it does not change across runs in sweep
        # in wandb, dataset will be visible on the first run in the sweep
        artifact_path = os.path.join(TRAIN_SAVE_DIR, f"graph_data_{sweep_id}.pkl")
        if not os.path.exists(artifact_path):
            dump(
                graph_data_dict,
                artifact_path,
                protocol=5,
            )
            artifact = wandb.Artifact(
                name="graph_data", type="dataset", metadata=closure_config
            )
            artifact.add_file(local_path=artifact_path)
            run.log_artifact(artifact)

        # run k-fold
        for index, (val_city_name, test_city_name) in enumerate(folds_tuples):
            # prepare data
            val_data = [graph_data_dict[val_city_name].to("cpu").clone()]
            train_data = [
                v.to("cpu").clone()
                for k, v in graph_data_dict.items()
                if k != val_city_name and k != test_city_name
            ]
            test_data = graph_data_dict[test_city_name].to("cpu").clone()

            # run training with checkpointing on lowest val_loss, return test metrics for the best model and its path
            # builtin preprocessing - scaling to N(0, 1)
            auc, accuracy, f1, model_path = train(
                train_data=train_data,
                val_data=val_data,
                test_data=test_data,
                epochs=epochs,
                hparams=hparams,
                train_save_dir=TRAIN_SAVE_DIR,
                num_classes=2,
            )

            # logging - single fold
            run.log_model(
                path=model_path,
                name=f"model_{fold_group_id}_fold_{index}",
            )
            run.log({f"auc_fold_{index}": auc})
            run.log({f"accuracy_fold_{index}": accuracy})
            run.log({f"f1_fold_{index}": f1})

            aucs.append(auc)
            accuracies.append(accuracy)
            f1s.append(f1)

        # logging - summary statistics
        mean_auc = sum(aucs) / len(aucs)
        mean_accuracy = sum(accuracies) / len(accuracies)
        mean_f1 = sum(f1s) / len(f1s)
        run.log({"mean_auc": mean_auc})
        run.log({"mean_accuracy": mean_accuracy})
        run.log({"mean_f1": mean_f1})

    return wrapped


def run_k_fold_tabular_data(closure_config, sweep_id):
    # analogously to the graph data, but for tabular data
    def wrapped():
        run = wandb.init()

        config = wandb.config

        for k, v in closure_config.items():
            run.log({k: 1 if v else 0})

        run.log({"data_structure": "tabular"})

        hparams = {}
        hparams["C"] = config["C"]
        solver, penalty = config["solver_penalty"].split(";")
        hparams["solver"] = solver
        if penalty == "None":
            penalty = None
        hparams["penalty"] = penalty

        aucs = []
        accuracies = []
        f1s = []

        fold_group_id = generate_id()

        # log data as artifact
        artifact_path = os.path.join(TRAIN_SAVE_DIR, f"tabular_data_{sweep_id}.pkl")

        if not os.path.exists(artifact_path):
            dump(
                tabular_data_dict,
                artifact_path,
                protocol=5,
            )
            artifact = wandb.Artifact(
                name="tabular_data", type="dataset", metadata=closure_config
            )
            artifact.add_file(local_path=artifact_path)
            run.log_artifact(artifact)

        timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

        for index, test_city_name in enumerate(cities_names_list):
            scaler = StandardScaler()
            X = pd.concat(
                [
                    m["X"]
                    for key, m in tabular_data_dict.items()
                    if key != test_city_name
                ]
            ).to_numpy()
            y = (
                pd.concat(
                    [
                        m["y"]
                        for key, m in tabular_data_dict.items()
                        if key != test_city_name
                    ]
                )
                .to_numpy()
                .ravel()
            )

            X = scaler.fit_transform(X)

            logistic_regression = LogisticRegression(
                C=hparams["C"],
                solver=hparams["solver"],
                penalty=hparams["penalty"],
                dual=False,
                tol=1e-4,
                fit_intercept=True,
                intercept_scaling=1,
                class_weight="balanced",
                random_state=1124,
                max_iter=1000,
                multi_class="auto",
                warm_start=False,
                n_jobs=-1,
                l1_ratio=0.5,
            )
            logistic_regression.fit(X, y)

            test_X = tabular_data_dict[test_city_name]["X"].to_numpy()
            test_X = scaler.transform(test_X)
            test_y = tabular_data_dict[test_city_name]["y"].to_numpy().ravel()
            y_pred = logistic_regression.predict(test_X)
            y_proba = logistic_regression.predict_proba(test_X)[:, 1]

            auc = roc_auc_score(test_y, y_proba, average="micro")
            accuracy = (y_pred == test_y).mean()
            f1 = f1_score(
                test_y,
                y_pred,
                pos_label=1,
                average="binary",
            )

            model_dir = os.path.join(TRAIN_SAVE_DIR, timestamp)

            os.makedirs(model_dir, exist_ok=True)

            model_path = os.path.join(
                model_dir, f"model_{fold_group_id}_fold_{index}.pkl"
            )

            with open(model_path, "wb") as f:
                dump(logistic_regression, f, protocol=5)

            run.log_model(
                path=model_path,
                name=f"model_{fold_group_id}_fold_{index}",
            )
            run.log({f"auc_fold_{index}": auc})
            run.log({f"accuracy_fold_{index}": accuracy})
            run.log({f"f1_fold_{index}": f1})

            aucs.append(auc)
            accuracies.append(accuracy)
            f1s.append(f1)

        mean_auc = sum(aucs) / len(aucs)
        mean_accuracy = sum(accuracies) / len(accuracies)
        mean_f1 = sum(f1s) / len(f1s)
        run.log({"mean_auc": mean_auc})
        run.log({"mean_accuracy": mean_accuracy})
        run.log({"mean_f1": mean_f1})

    return wrapped


def run_sweep_graph_data(config):
    try:
        wandb.login(key=WANDB_API_KEY)
        sweep_id = wandb.sweep(
            WANDB_SWEEP_PARAMS_GRAPH_DATA, project="accidents-downstream-task-v2"
        )

        wandb.agent(
            sweep_id,
            function=run_k_fold_graph_data(config, sweep_id),
            count=SWEEP_RUNS_COUNT,
        )
    except Exception as e:
        print(e)
        wandb.finish()
        wandb.sweep
        raise e


def run_sweep_tabular_data(config):
    try:
        wandb.login(key=WANDB_API_KEY)

        sweep_id = wandb.sweep(
            WANDB_SWEEP_PARAMS_TABULAR_DATA, project="accidents-downstream-task-v2"
        )

        wandb.agent(
            sweep_id,
            function=run_k_fold_tabular_data(config, sweep_id),
            count=SWEEP_RUNS_COUNT,
        )
    except Exception as e:
        print(e)
        wandb.finish()
        raise e

# 12. Run functions

For each config:

1. Determine if config requires tabular or graph data
2. Create data excluding attributes not included in the config
3. Run the sweep


In [18]:
from typing import Any, Dict

def derive_data_structure(attr_config):
    if attr_config["USE_OSMNX_ATTRS"]:
        return "graph"
    return "tabular"


configs_size = len(ATTRIBUTES_CONFIGURATIONS)

for index, attr_config in enumerate(ATTRIBUTES_CONFIGURATIONS):
    print("Sweep for config {}/{} in progress...".format(index + 1, configs_size))

    # assert "USE_ORTOPHOTO" in attr_config, "Provide USE_ORTOPHOTO key"
    # assert "USE_HEXES_ATTRS" in attr_config, "Provide USE_HEXES_ATTRS key"
    # assert "USE_OSMNX_ATTRS" in attr_config, "Provide USE_OSMNX_ATTRS key"

    data_structure = derive_data_structure(attr_config)
    
    creator_params: Dict[str, Any] = dict(
        use_hexes_attr=bool(attr_config["USE_HEXES_ATTRS"]),
    )
    
    
    if isinstance(attr_config["USE_HEXES_ATTRS"], dict):
        hex_fi_config = attr_config["USE_HEXES_ATTRS"]
        hex_features = pd.read_json(
            f"{HEX_FI_LOCATION}/accidents_top_{hex_fi_config['NUM_FEATURES']}_percent_{hex_fi_config['IN_PERCENT']}.json"
        )
        hex_features = hex_features["top_values"].tolist()
        creator_params["columns_to_take"] = hex_features
    elif attr_config["USE_HEXES_ATTRS"] == True:
        creator_params["columns_to_take"] = "all"    
    
    if data_structure == "graph":
        graph_data_dict = {
            city_name: create_graph_data(
                hexes=gdfs["hexes"],
                controller=cast(GraphLayerController, gdfs["controller"]),
                osmnx_edges=gdfs["osmnx_edges"],
                osmnx_nodes=gdfs["osmnx_nodes"],
                use_ortophoto=attr_config["USE_ORTOPHOTO"],
                **creator_params,
            )
            for city_name, gdfs in gdfs_dict.items()
        }
        run_sweep_graph_data(attr_config)
    elif data_structure == "tabular":
        tabular_data_dict = {
            city_name: create_tabular_data(
                hexes=gdfs["hexes"],
                controller=cast(GraphLayerController, gdfs["controller"]),
                use_ortophoto=attr_config["USE_ORTOPHOTO"],
                **creator_params,
            )
            for city_name, gdfs in gdfs_dict.items()
        }
        run_sweep_tabular_data(attr_config)
    else:
        raise ValueError("Unknown data structure")

Sweep for config 1/4 in progress...


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjuliajaniak[0m ([33mgradient_pwr[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/jjaniak/.netrc


Create sweep with ID: dv87z9ro
Sweep URL: https://wandb.ai/gradient_pwr/accidents-downstream-task-v2/sweeps/dv87z9ro


[34m[1mwandb[0m: Agent Starting Run: 8xv6syn2 with config:
[34m[1mwandb[0m: 	hidden_channels: 50
[34m[1mwandb[0m: 	learning_rate: 0.00012796340759857518
[34m[1mwandb[0m: 	lin_layer_size: 128
[34m[1mwandb[0m: 	num_conv_layers: 3
[34m[1mwandb[0m: 	num_lin_layers: 4
[34m[1mwandb[0m: 	weight_decay: 0.0003861152989470642
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_05_03/lightning_logs
/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:452: A layer with UninitializedParameter was found. Thus, the total number of parameters detected may be inaccurate.

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 145 K 
------------------------------------
145 K     Trainable params
0         Non-trainable params
145 K     Total params
0.582     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='162.836 MB of 162.836 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run 8xv6syn2 errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_05_39/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 14.8 K
------------------------------------
14.8 K    Trainable params
0         Non-trainable params
14.8 K    Total params
0.059     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run ov2lgtee errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_05_56/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 15.2 K
------------------------------------
15.2 K    Trainable params
0         Non-trainable params
15.2 K    Total params
0.061     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run u1igbv8y errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_06_12/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 38.7 K
------------------------------------
38.7 K    Trainable params
0         Non-trainable params
38.7 K    Total params
0.155     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run 8cgnldoh errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_06_34/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 13.4 K
------------------------------------
13.4 K    Trainable params
0         Non-trainable params
13.4 K    Total params
0.054     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run j5f1qaqk errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_06_47/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 62.5 K
------------------------------------
62.5 K    Trainable params
0         Non-trainable params
62.5 K    Total params
0.250     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run p930e5zi errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

Sweep for config 2/4 in progress...


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/jjaniak/.netrc


Create sweep with ID: mw9d6wsp
Sweep URL: https://wandb.ai/gradient_pwr/accidents-downstream-task-v2/sweeps/mw9d6wsp


[34m[1mwandb[0m: Agent Starting Run: dhnmr268 with config:
[34m[1mwandb[0m: 	hidden_channels: 50
[34m[1mwandb[0m: 	learning_rate: 0.0010748515803113716
[34m[1mwandb[0m: 	lin_layer_size: 8
[34m[1mwandb[0m: 	num_conv_layers: 2
[34m[1mwandb[0m: 	num_lin_layers: 4
[34m[1mwandb[0m: 	weight_decay: 8.098150785204365e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_07_04/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 62.3 K
------------------------------------
62.3 K    Trainable params
0         Non-trainable params
62.3 K    Total params
0.249     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='191.988 MB of 191.988 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run dhnmr268 errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_07_36/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 27.8 K
------------------------------------
27.8 K    Trainable params
0         Non-trainable params
27.8 K    Total params
0.111     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run o8mbtt51 errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_07_51/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 58.3 K
------------------------------------
58.3 K    Trainable params
0         Non-trainable params
58.3 K    Total params
0.233     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run bgvz6wur errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

Sweep for config 3/4 in progress...


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/jjaniak/.netrc


Create sweep with ID: t34vfnwj
Sweep URL: https://wandb.ai/gradient_pwr/accidents-downstream-task-v2/sweeps/t34vfnwj


[34m[1mwandb[0m: Agent Starting Run: zm2na1e1 with config:
[34m[1mwandb[0m: 	hidden_channels: 40
[34m[1mwandb[0m: 	learning_rate: 0.00019472320016502025
[34m[1mwandb[0m: 	lin_layer_size: 64
[34m[1mwandb[0m: 	num_conv_layers: 5
[34m[1mwandb[0m: 	num_lin_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001110778108303117
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_08_07/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 56.5 K
------------------------------------
56.5 K    Trainable params
0         Non-trainable params
56.5 K    Total params
0.226     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='164.969 MB of 164.969 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run zm2na1e1 errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_08_38/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 17.6 K
------------------------------------
17.6 K    Trainable params
0         Non-trainable params
17.6 K    Total params
0.071     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run te8x29hn errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01116289027777763, max=1.0)…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_09_00/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 11.5 K
------------------------------------
11.5 K    Trainable params
0         Non-trainable params
11.5 K    Total params
0.046     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run pwx8ywg7 errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_09_15/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 9.6 K 
------------------------------------
9.6 K     Trainable params
0         Non-trainable params
9.6 K     Total params
0.038     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run 1cd364ce errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_09_31/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 26.2 K
------------------------------------
26.2 K    Trainable params
0         Non-trainable params
26.2 K    Total params
0.105     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run e3eodpv0 errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_09_48/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 19.6 K
------------------------------------
19.6 K    Trainable params
0         Non-trainable params
19.6 K    Total params
0.078     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run uv33rwpt errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

Sweep for config 4/4 in progress...


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/jjaniak/.netrc


Create sweep with ID: m8zigvbo
Sweep URL: https://wandb.ai/gradient_pwr/accidents-downstream-task-v2/sweeps/m8zigvbo


[34m[1mwandb[0m: Agent Starting Run: mycj9ux4 with config:
[34m[1mwandb[0m: 	hidden_channels: 20
[34m[1mwandb[0m: 	learning_rate: 4.5284204996142113e-05
[34m[1mwandb[0m: 	lin_layer_size: 16
[34m[1mwandb[0m: 	num_conv_layers: 2
[34m[1mwandb[0m: 	num_lin_layers: 0
[34m[1mwandb[0m: 	weight_decay: 0.0005153212091023232
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ../../gradient_logs/2024_06_07_18_10_04/lightning_logs

  | Name  | Type      | Params
------------------------------------
0 | model | HeteroGNN | 47.9 K
------------------------------------
47.9 K    Trainable params
0         Non-trainable params
47.9 K    Total params
0.192     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


VBox(children=(Label(value='237.849 MB of 237.849 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
USE_HEXES_ATTRS,▁
USE_ORTOPHOTO,▁
USE_OSMNX_ATTRS,▁

0,1
USE_HEXES_ATTRS,1
USE_ORTOPHOTO,0
USE_OSMNX_ATTRS,1
data_structure,graph


Run mycj9ux4 errored:
Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py", line 62, in wrapped
    auc, accuracy, f1, model_path = train(
  File "/Users/jjaniak/Documents/studia/projekt/gradient/notebooks/downstream_tasks/../../src/training/train.py", line 84, in train
    trainer.fit(model, train_loader, val_dataloaders=(val_loader or None))
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 545, in fit
    call._call_and_handle_interrupt(
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 44, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.

Problem at: /var/folders/4k/8js3jm9n457cm4_v631zyrkw0000gn/T/ipykernel_25686/3559290024.py 4 wrapped


Traceback (most recent call last):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/sdk/wandb_init.py", line 1176, in init
    run = wi.init()
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/sdk/wandb_init.py", line 817, in init
    run_start_result = run_start_handle.wait(timeout=30)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/sdk/lib/mailbox.py", line 283, in wait
    found, abandoned = self._slot._get_and_clear(timeout=wait_timeout)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/sdk/lib/mailbox.py", line 130, in _get_and_clear
    if self._wait(timeout=timeout):
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/site-packages/wandb/sdk/lib/mailbox.py", line 126, in _wait
    return self._event.wait(timeout=timeout)
  File "/Users/jjaniak/anaconda3/envs/studia_projekt/lib/python3.8/threading.py", line 558, in w