In [1]:
from pecanpy import pecanpy as pp
import utils
import networkx as nx
import os
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.svm import SVC
import json
from numpy.typing import NDArray
from typing import Dict
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from training import train_gnn
from torch_geometric.data import Data
import torch
import torch_geometric as pyg

%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [7]:

import random
model_attempts = []  # List to store the attempts for different models

seed = 42

# Iterate over the two model names: 'gat' and 'gnn'
for model_name in ['gat', 'gnn']:
    best_heads = -1
    best_nb_graph_conv = -1
    best_out_channels = -1
    best_score = -1
    attempts = []  # List to store the attempts for a specific model configuration

    # Set the hyperparameter ranges based on the model name
    if model_name == 'gat':
        heads = [2, 4, 8]
        nb_convs = [3, 5]
        out_channels = [8, 16, 32]
    else:
        heads = [0]
        nb_convs = [3, 5, 10]
        out_channels = [16, 32, 64, 128]

    # Iterate over the hyperparameters
    for head in heads:
        for nb_conv in nb_convs:
            for out_channel in out_channels:
                f1_train = []  # List to store F1 scores for training set
                f1_val = []  # List to store F1 scores for validation set

                # Iterate over the cities
                for city in ["turku", "detroit", "paris", "adelaide"]:
                    # Set the random seed for reproducibility
                    random.seed(seed)
                    np.random.seed(seed)
                    torch.manual_seed(seed)
                    torch.cuda.manual_seed_all(seed)

                    print(f"starting training for {city} with model_name={model_name}, head={head}, nb_conv={nb_conv}, out_channels={out_channel}")
                    
                    # Load all featrues
                    city_name = city
                    data_dir = 'data'
                    df_features = pd.read_csv(os.path.join(data_dir,"handcrafted_features.csv"))
                    df_features = df_features[df_features["city"]==city]
                    targets_handcrafted = df_features["city_center"].values
                    features_handcrafted = df_features.drop(["stop_I", "name", "city_center", "city","Unnamed: 0"],axis=1).values
                    graph = nx.read_edgelist(os.path.join(data_dir, city_name, 'adj_mat.edg'), create_using=nx.DiGraph)

                    adj_mat = nx.adjacency_matrix(graph, weight=None) # not weighted
                    edge_index, _ = pyg.utils.from_scipy_sparse_matrix(adj_mat)
                    # Split train and test split
                    train_ids, test_ids = train_test_split(
                        np.arange(features_handcrafted.shape[0]), test_size=0.2, stratify=targets_handcrafted, random_state=seed
                    )

                    train_ids, val_ids = train_test_split(
                        train_ids, test_size=0.2, stratify=targets_handcrafted[train_ids], random_state=seed
                    )
                    # Scale the features
                    standard_scaler = StandardScaler()
                    standard_scaler.fit(features_handcrafted[train_ids])
                    features_handcrafted = standard_scaler.transform(features_handcrafted)

                    # Create data
                    d = Data(
                        x=torch.from_numpy(features_handcrafted),
                        y=torch.tensor(targets_handcrafted, dtype=torch.float).clone(),
                        edge_index=edge_index.clone(),
                    )

                    pos_weight = np.sum(targets_handcrafted[train_ids] == 0) / np.sum(targets_handcrafted[train_ids] == 1)
                    # train the model
                    results_gat = train_gnn(
                        [d],
                        train_ids,
                        val_ids,
                        test_ids,
                        pos_weight=pos_weight,
                        model_name=model_name,
                        lr=1e-3,
                        epochs=200,
                        out_channels_graph=out_channel,
                        in_channels_graph=18,
                        heads=head,
                        nb_graph_conv=nb_conv,
                        dropout=0.0
                    )
                    # Append the results
                    f1_train.append(results_gat[0]['train']["1"]["f1-score"])
                    f1_val.append(results_gat[1])
                
                # Compute average score over 4 cities and save the best model
                avg_f1_train = np.mean(f1_train)
                avg_f1_val = np.mean(f1_val)
                attempts.append({"model_name":model_name,"head":head,"nb_conv":nb_conv,"out_channel":out_channel,"train_f1": avg_f1_train, "val_f1": avg_f1_val})

                print(f'\t|| Avg train F1-score : {avg_f1_train}, Avg val F1-score : {avg_f1_val}', end='\r')
                print()

                if avg_f1_val  > best_score:
                    best_score = avg_f1_val
                    best_heads = head
                    best_nb_graph_conv = nb_conv
                    best_out_channels = out_channel

    print(f"best parameters with model_name={model_name}, head={best_heads}, nb_conv={best_nb_graph_conv}, out_channels={best_out_channels}")
    model_attempts.append(attempts)


starting training for turku with model_name=gat, head=2, nb_conv=3, out_channels=8
starting training for detroit with model_name=gat, head=2, nb_conv=3, out_channels=8
starting training for paris with model_name=gat, head=2, nb_conv=3, out_channels=8
starting training for adelaide with model_name=gat, head=2, nb_conv=3, out_channels=8
	|| Avg train F1-score : 0.5875801063735641, Avg val F1-score : 0.5300247720860792
starting training for turku with model_name=gat, head=2, nb_conv=3, out_channels=16
starting training for detroit with model_name=gat, head=2, nb_conv=3, out_channels=16
starting training for paris with model_name=gat, head=2, nb_conv=3, out_channels=16
starting training for adelaide with model_name=gat, head=2, nb_conv=3, out_channels=16
	|| Avg train F1-score : 0.6329719665812623, Avg val F1-score : 0.560326161319012
starting training for turku with model_name=gat, head=2, nb_conv=3, out_channels=32
starting training for detroit with model_name=gat, head=2, nb_conv=3, out

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

starting training for detroit with model_name=gat, head=2, nb_conv=5, out_channels=8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

starting training for paris with model_name=gat, head=2, nb_conv=5, out_channels=8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

starting training for adelaide with model_name=gat, head=2, nb_conv=5, out_channels=8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

	|| Avg train F1-score : 0.5105673153460643, Avg val F1-score : 0.4905895025824491
starting training for turku with model_name=gat, head=2, nb_conv=5, out_channels=16
starting training for detroit with model_name=gat, head=2, nb_conv=5, out_channels=16
starting training for paris with model_name=gat, head=2, nb_conv=5, out_channels=16
starting training for adelaide with model_name=gat, head=2, nb_conv=5, out_channels=16
	|| Avg train F1-score : 0.5696528422580671, Avg val F1-score : 0.49541946355726646
starting training for turku with model_name=gat, head=2, nb_conv=5, out_channels=32


  _warn_prf(average, modifier, msg_start, len(result))


starting training for detroit with model_name=gat, head=2, nb_conv=5, out_channels=32


  _warn_prf(average, modifier, msg_start, len(result))


starting training for paris with model_name=gat, head=2, nb_conv=5, out_channels=32


  _warn_prf(average, modifier, msg_start, len(result))


starting training for adelaide with model_name=gat, head=2, nb_conv=5, out_channels=32


  _warn_prf(average, modifier, msg_start, len(result))


	|| Avg train F1-score : 0.6248778372096037, Avg val F1-score : 0.5201097902037968
starting training for turku with model_name=gat, head=4, nb_conv=3, out_channels=8
starting training for detroit with model_name=gat, head=4, nb_conv=3, out_channels=8
starting training for paris with model_name=gat, head=4, nb_conv=3, out_channels=8
starting training for adelaide with model_name=gat, head=4, nb_conv=3, out_channels=8
	|| Avg train F1-score : 0.5564245163421372, Avg val F1-score : 0.5259669177038386
starting training for turku with model_name=gat, head=4, nb_conv=3, out_channels=16
starting training for detroit with model_name=gat, head=4, nb_conv=3, out_channels=16
starting training for paris with model_name=gat, head=4, nb_conv=3, out_channels=16
starting training for adelaide with model_name=gat, head=4, nb_conv=3, out_channels=16
	|| Avg train F1-score : 0.6523163056544297, Avg val F1-score : 0.5389834372313584
starting training for turku with model_name=gat, head=4, nb_conv=3, out_c

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


starting training for detroit with model_name=gat, head=4, nb_conv=5, out_channels=16


  _warn_prf(average, modifier, msg_start, len(result))


starting training for paris with model_name=gat, head=4, nb_conv=5, out_channels=16


  _warn_prf(average, modifier, msg_start, len(result))


starting training for adelaide with model_name=gat, head=4, nb_conv=5, out_channels=16


  _warn_prf(average, modifier, msg_start, len(result))


	|| Avg train F1-score : 0.6157774258749164, Avg val F1-score : 0.5151085748443508
starting training for turku with model_name=gat, head=4, nb_conv=5, out_channels=32
starting training for detroit with model_name=gat, head=4, nb_conv=5, out_channels=32
starting training for paris with model_name=gat, head=4, nb_conv=5, out_channels=32
starting training for adelaide with model_name=gat, head=4, nb_conv=5, out_channels=32
	|| Avg train F1-score : 0.6745305245002385, Avg val F1-score : 0.5275448607768157
starting training for turku with model_name=gat, head=8, nb_conv=3, out_channels=8
starting training for detroit with model_name=gat, head=8, nb_conv=3, out_channels=8
starting training for paris with model_name=gat, head=8, nb_conv=3, out_channels=8
starting training for adelaide with model_name=gat, head=8, nb_conv=3, out_channels=8
	|| Avg train F1-score : 0.6273114991566326, Avg val F1-score : 0.5368047795070465
starting training for turku with model_name=gat, head=8, nb_conv=3, out_c

  _warn_prf(average, modifier, msg_start, len(result))


starting training for detroit with model_name=gat, head=8, nb_conv=5, out_channels=8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


starting training for paris with model_name=gat, head=8, nb_conv=5, out_channels=8


  _warn_prf(average, modifier, msg_start, len(result))


starting training for adelaide with model_name=gat, head=8, nb_conv=5, out_channels=8


  _warn_prf(average, modifier, msg_start, len(result))


	|| Avg train F1-score : 0.6192238541200171, Avg val F1-score : 0.49881474365698364
starting training for turku with model_name=gat, head=8, nb_conv=5, out_channels=16
starting training for detroit with model_name=gat, head=8, nb_conv=5, out_channels=16
starting training for paris with model_name=gat, head=8, nb_conv=5, out_channels=16
starting training for adelaide with model_name=gat, head=8, nb_conv=5, out_channels=16
	|| Avg train F1-score : 0.6351279767868662, Avg val F1-score : 0.52388853893406
starting training for turku with model_name=gat, head=8, nb_conv=5, out_channels=32
starting training for detroit with model_name=gat, head=8, nb_conv=5, out_channels=32
starting training for paris with model_name=gat, head=8, nb_conv=5, out_channels=32
starting training for adelaide with model_name=gat, head=8, nb_conv=5, out_channels=32
	|| Avg train F1-score : 0.6254495811957321, Avg val F1-score : 0.5294389426539865
best parameters with model_name=gat, head=8, nb_conv=3, out_channels=3

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	|| Avg train F1-score : 0.7492632072154666, Avg val F1-score : 0.5851518901807551
starting training for turku with model_name=gnn, head=0, nb_conv=5, out_channels=32
starting training for detroit with model_name=gnn, head=0, nb_conv=5, out_channels=32
starting training for paris with model_name=gnn, head=0, nb_conv=5, out_channels=32
starting training for adelaide with model_name=gnn, head=0, nb_conv=5, out_channels=32
	|| Avg train F1-score : 0.8359876079245525, Avg val F1-score : 0.6357426234242782
starting training for turku with model_name=gnn, head=0, nb_conv=5, out_channels=64
starting training for detroit with model_name=gnn, head=0, nb_conv=5, out_channels=64
starting training for paris with model_name=gnn, head=0, nb_conv=5, out_channels=64
starting training for adelaide with model_name=gnn, head=0, nb_conv=5, out_channels=64
	|| Avg train F1-score : 0.9017122820941009, Avg val F1-score : 0.6228870667609748
starting training for turku with model_name=gnn, head=0, nb_conv=5, o

  _warn_prf(average, modifier, msg_start, len(result))


starting training for adelaide with model_name=gnn, head=0, nb_conv=5, out_channels=128
	|| Avg train F1-score : 0.8271915620199232, Avg val F1-score : 0.6305840016243944
starting training for turku with model_name=gnn, head=0, nb_conv=10, out_channels=16
starting training for detroit with model_name=gnn, head=0, nb_conv=10, out_channels=16


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


starting training for paris with model_name=gnn, head=0, nb_conv=10, out_channels=16
starting training for adelaide with model_name=gnn, head=0, nb_conv=10, out_channels=16


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	|| Avg train F1-score : 0.5459343760386124, Avg val F1-score : 0.4704772123525629
starting training for turku with model_name=gnn, head=0, nb_conv=10, out_channels=32
starting training for detroit with model_name=gnn, head=0, nb_conv=10, out_channels=32
starting training for paris with model_name=gnn, head=0, nb_conv=10, out_channels=32
starting training for adelaide with model_name=gnn, head=0, nb_conv=10, out_channels=32
	|| Avg train F1-score : 0.6025516968256766, Avg val F1-score : 0.4998167423334966
starting training for turku with model_name=gnn, head=0, nb_conv=10, out_channels=64
starting training for detroit with model_name=gnn, head=0, nb_conv=10, out_channels=64
starting training for paris with model_name=gnn, head=0, nb_conv=10, out_channels=64


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


starting training for adelaide with model_name=gnn, head=0, nb_conv=10, out_channels=64


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	|| Avg train F1-score : 0.6567787240163387, Avg val F1-score : 0.5644025261019379
starting training for turku with model_name=gnn, head=0, nb_conv=10, out_channels=128
starting training for detroit with model_name=gnn, head=0, nb_conv=10, out_channels=128


  _warn_prf(average, modifier, msg_start, len(result))


starting training for paris with model_name=gnn, head=0, nb_conv=10, out_channels=128
starting training for adelaide with model_name=gnn, head=0, nb_conv=10, out_channels=128


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	|| Avg train F1-score : 0.6904641804882314, Avg val F1-score : 0.571716654694375
best parameters with model_name=gnn, head=0, nb_conv=3, out_channels=32


In [9]:
import json
with open("save_gnns.json","w") as f:
    json.dump(model_attempts,f)

In [11]:
# Run on all cities 
cities_loop_gnn('data')

bordeaux
Pos weight : 5.426900584795321
Training GNN...
*******************************
Training GAT...

***********************************************
***********************************************
helsinki
Pos weight : 6.6279863481228665
Training GNN...
*******************************
Training GAT...

***********************************************
***********************************************
rome
Pos weight : 5.908093278463649
Training GNN...
*******************************
Training GAT...

***********************************************
***********************************************
luxembourg
Pos weight : 6.734513274336283
Training GNN...
*******************************
Training GAT...

***********************************************
***********************************************
brisbane
Pos weight : 4.839167455061495
Training GNN...
*******************************
Training GAT...

***********************************************
********************************************