In [1]:
%load_ext autoreload
%autoreload 2

In [33]:
import torch
import os
import pandas as pd
import torch.nn as nn
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.loader import DataLoader
from torch.utils.data import random_split, Dataset
 
from tqdm import tqdm
from utils.london.process_data import build_train_test_loaders
from utils.london.training import CustomMAELoss, CustomMAPELoss, test_model
from utils.london.link_loads import get_graph_attributes, df_to_graph, build_quarter_hour_data, add_missing_nodes
from utils.london.models import STGCN
# import networkx as nx

In [36]:
MAE = CustomMAELoss()
MAPE = CustomMAPELoss()

In [37]:
folder_path = "data/london/"
# Get graph attributes, create dfs from csv, process Link column and ordered dfs by time
num_nodes, edge_index, node_mapping, dfs = get_graph_attributes(folder_path)

In [38]:
graph_data = []
# each df should have the same dimension and same nodes at the same columns
for filename, df in dfs.items():
    df = add_missing_nodes(df, node_mapping, num_nodes) # add zeros row for missing nodes
    df_qhrs = build_quarter_hour_data(df, filename, num_nodes) # retourne 24*4 df avec ses paramètres temporel et le flow
    graph_data.extend(df_qhrs)
    
graphs = [df_to_graph(df, edge_index) for df in graph_data]  # Un graphe par quart d'heure

## GCN + GRU (pas concluant, ne pas run)

In [46]:
class GraphSequenceDataset(Dataset):
    def __init__(self, graphs, window_size=4):
        self.graphs = graphs
        self.window_size = window_size
        if len(graphs) <= window_size:
            raise ValueError("Error : window_size should be higher than the number of graphs")
        
    def __len__(self):
        return len(self.graphs) - self.window_size

    def __getitem__(self, idx):
        # input_graphs prend en compte les window_size graphe précédent l'instant idx + le graphe à prèdire à l'instant idx
        input_graphs = self.graphs[idx : idx + self.window_size + 1]
        target = self.graphs[idx + self.window_size].y  # On prédit y du dernier graph
        return input_graphs, target 

In [47]:
# Définition des loaders
window_size = 4  # Nombre de pas de temps en entrée
train_size = int(0.8 * len(graphs))

train_dataset = GraphSequenceDataset(graphs[:train_size], window_size)  
test_dataset = GraphSequenceDataset(graphs[train_size:], window_size)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [48]:
class TGCN(nn.Module):
    def __init__(self, node_features, hidden_dim, gru_hidden_dim):
        super(TGCN, self).__init__()
        self.gcn = GCNConv(node_features, hidden_dim)  # GCN
        self.gru = nn.GRU(hidden_dim, gru_hidden_dim, batch_first=True)  # GRU
        self.fc = nn.Linear(gru_hidden_dim, 1)  # Prédiction finale

    def forward(self, graph_seq):
        # window_size = len(graph_seq)  # Nombre de pas de temps
        # batch_size = graph_seq[0].x.shape[0]  # Nombre de nœuds

        spatial_features = []
        for graph in graph_seq:
            x = self.gcn(graph.x, graph.edge_index)  # GCN
            x = F.relu(x)
            spatial_features.append(x)

        spatial_features = torch.stack(spatial_features, dim=1)  # (batch, time, hidden_dim)

        _ , final_state = self.gru(spatial_features)  # prédiction sur le dernier (1, gru_hidden_dim)
        final_state = final_state.squeeze() # (gru_hidden_dim)
        final_out = self.fc(final_state) # Prédiction sur le dernier état
        final_out = F.relu(final_out)
        return final_out

In [24]:
graphs[0]

Data(x=[1187, 10], edge_index=[2, 3946], y=[1187, 1])

In [50]:
model = TGCN(node_features=10, hidden_dim=32, gru_hidden_dim=64)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = MAE

# Boucle d'entraînement
for epoch in tqdm(range(50)):
    model.train()
    total_loss = 0
    for graph_seq, target in train_loader:
        optimizer.zero_grad()
        output = model(graph_seq)

        target = target.squeeze()
        output = output.reshape(target.shape)
    
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")


  2%|▏         | 1/50 [00:44<36:16, 44.42s/it]

Epoch 1, Loss: 256.43267001944605


  4%|▍         | 2/50 [01:29<35:38, 44.55s/it]

Epoch 2, Loss: 256.43267001944605


  4%|▍         | 2/50 [01:32<36:58, 46.21s/it]


KeyboardInterrupt: 

In [19]:
model.eval()
test_loss = 0
with torch.no_grad():
    for graph_seq, target in test_loader:

        output = model(graph_seq)
        loss = criterion(output, target)
        test_loss += loss.item()

print(f"Test MAE: {test_loss / len(test_loader)}")


Test MAE: 204.33262727453592


In [22]:
39e6/(96*1206)

336.8573797678275

The average flow per inter_station per 15min is 337. So 204 MAE error is not a valid result.

## GCN with past flows as node features

In [39]:
window_size=2
train_loader, test_loader = build_train_test_loaders(graphs, window_size)

In [42]:
from utils.london.models import STGCN

In [43]:
model = STGCN(node_features=window_size, hidden_dim=32)

optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
criterion = MAE

num_epochs = 50

# Boucle d'entraînement
for epoch in tqdm(range(num_epochs)):
    model.train()
    train_loss = 0
    for graph, temporal_features in train_loader:
        temporal_features = temporal_features.squeeze()
        optimizer.zero_grad()
        output = model(graph, temporal_features)
        target = graph.y    
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()        
        train_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Train loss: {train_loss / len(train_loader)}")

    # Évaluation sur le jeu de test
    model.eval()
    test_loss = 0
    with torch.no_grad():  # Désactivation des gradients pour l'évaluation
        for graph, temporal_features in test_loader:
            temporal_features = temporal_features.squeeze()  # Squeeze si nécessaire
            output = model(graph, temporal_features)  # Assure-toi de passer aussi les temporal_features
            target = graph.y
            loss = criterion(output, target)
            test_loss += loss.item()

    print(f"Epoch {epoch+1}, Test loss: {test_loss / len(test_loader)}")

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 1, Train loss: 100.47351199780408


  2%|▏         | 1/50 [00:05<04:50,  5.93s/it]

Epoch 1, Test loss: 95.45939459920145
Epoch 2, Train loss: 96.26022369892029


  4%|▍         | 2/50 [00:12<04:50,  6.06s/it]

Epoch 2, Test loss: 94.37469206192205
Epoch 3, Train loss: 95.58594897593937


  6%|▌         | 3/50 [00:17<04:30,  5.75s/it]

Epoch 3, Test loss: 94.12730406411161
Epoch 4, Train loss: 95.3206072324508


  8%|▊         | 4/50 [00:22<04:16,  5.59s/it]

Epoch 4, Test loss: 93.41892328446542
Epoch 5, Train loss: 95.06114119989522


 10%|█         | 5/50 [00:28<04:07,  5.49s/it]

Epoch 5, Test loss: 93.56173534962875
Epoch 6, Train loss: 94.75073385025183


 12%|█▏        | 6/50 [00:33<03:59,  5.43s/it]

Epoch 6, Test loss: 93.47253161682451
Epoch 7, Train loss: 94.85445664896196


 14%|█▍        | 7/50 [00:38<03:51,  5.39s/it]

Epoch 7, Test loss: 93.59560856110146
Epoch 8, Train loss: 94.78432692956879


 16%|█▌        | 8/50 [00:44<03:49,  5.47s/it]

Epoch 8, Test loss: 93.91616215158025
Epoch 9, Train loss: 95.002119272532


 18%|█▊        | 9/50 [00:50<03:48,  5.57s/it]

Epoch 9, Test loss: 93.88221287523636
Epoch 10, Train loss: 94.7085356075183


 20%|██        | 10/50 [00:56<03:47,  5.68s/it]

Epoch 10, Test loss: 93.77702474505254
Epoch 11, Train loss: 94.81156799830653


 22%|██▏       | 11/50 [01:02<03:49,  5.89s/it]

Epoch 11, Test loss: 93.76205791962094
Epoch 12, Train loss: 94.68818351696903


 24%|██▍       | 12/50 [01:08<03:48,  6.02s/it]

Epoch 12, Test loss: 93.68528427101451
Epoch 13, Train loss: 94.74922383832012


 26%|██▌       | 13/50 [01:14<03:42,  6.01s/it]

Epoch 13, Test loss: 93.71636227046561
Epoch 14, Train loss: 94.6366285031497


 28%|██▊       | 14/50 [01:20<03:37,  6.04s/it]

Epoch 14, Test loss: 93.16323509985
Epoch 15, Train loss: 94.64906847991531


 30%|███       | 15/50 [01:26<03:31,  6.03s/it]

Epoch 15, Test loss: 93.72442082546651
Epoch 16, Train loss: 94.55932885276334


 32%|███▏      | 16/50 [01:33<03:25,  6.06s/it]

Epoch 16, Test loss: 93.1794852242485
Epoch 17, Train loss: 94.67553788443364


 34%|███▍      | 17/50 [01:39<03:20,  6.08s/it]

Epoch 17, Test loss: 93.71168802408188
Epoch 18, Train loss: 94.65315067627488


 36%|███▌      | 18/50 [01:45<03:12,  6.03s/it]

Epoch 18, Test loss: 93.7252759370308
Epoch 19, Train loss: 94.63755591902193


 38%|███▊      | 19/50 [01:51<03:08,  6.08s/it]

Epoch 19, Test loss: 93.73460464244405
Epoch 20, Train loss: 94.62168487069042


 40%|████      | 20/50 [01:57<03:03,  6.11s/it]

Epoch 20, Test loss: 93.76262248358569
Epoch 21, Train loss: 94.67635993412428


 42%|████▏     | 21/50 [02:03<03:01,  6.24s/it]

Epoch 21, Test loss: 93.69829333628442
Epoch 22, Train loss: 94.55081407889455


 44%|████▍     | 22/50 [02:10<02:55,  6.28s/it]

Epoch 22, Test loss: 93.61120733359068
Epoch 23, Train loss: 94.5886338769419


 46%|████▌     | 23/50 [02:16<02:51,  6.34s/it]

Epoch 23, Test loss: 93.53438721958726
Epoch 24, Train loss: 94.46147359143308


 48%|████▊     | 24/50 [02:22<02:42,  6.27s/it]

Epoch 24, Test loss: 93.25294735420589
Epoch 25, Train loss: 94.69476654265664


 50%|█████     | 25/50 [02:29<02:35,  6.24s/it]

Epoch 25, Test loss: 93.73171333412465
Epoch 26, Train loss: 94.6759242680447


 52%|█████▏    | 26/50 [02:35<02:29,  6.21s/it]

Epoch 26, Test loss: 93.75238100418228
Epoch 27, Train loss: 94.66044446138066


 54%|█████▍    | 27/50 [02:41<02:22,  6.19s/it]

Epoch 27, Test loss: 93.68261738133434
Epoch 28, Train loss: 94.57111603072883


 56%|█████▌    | 28/50 [02:47<02:16,  6.18s/it]

Epoch 28, Test loss: 93.60340262139884
Epoch 29, Train loss: 94.6255016167011


 58%|█████▊    | 29/50 [02:53<02:10,  6.24s/it]

Epoch 29, Test loss: 93.7167689548562
Epoch 30, Train loss: 94.58987219773998


 60%|██████    | 30/50 [03:00<02:05,  6.26s/it]

Epoch 30, Test loss: 93.70498546940476
Epoch 31, Train loss: 94.65538724391735


 62%|██████▏   | 31/50 [03:06<01:58,  6.26s/it]

Epoch 31, Test loss: 93.2144093709412
Epoch 32, Train loss: 94.54902927732188


 64%|██████▍   | 32/50 [03:12<01:52,  6.25s/it]

Epoch 32, Test loss: 93.64893757899443
Epoch 33, Train loss: 94.58265015328634


 66%|██████▌   | 33/50 [03:19<01:46,  6.27s/it]

Epoch 33, Test loss: 93.6617350553475
Epoch 34, Train loss: 94.58451657100053


 68%|██████▊   | 34/50 [03:25<01:39,  6.25s/it]

Epoch 34, Test loss: 93.66912383964772
Epoch 35, Train loss: 94.6206532032893


 70%|███████   | 35/50 [03:31<01:33,  6.24s/it]

Epoch 35, Test loss: 93.26707798194215
Epoch 36, Train loss: 94.72690865811855


 72%|███████▏  | 36/50 [03:37<01:28,  6.32s/it]

Epoch 36, Test loss: 93.71604551924094
Epoch 37, Train loss: 94.69783265922594


 74%|███████▍  | 37/50 [03:44<01:21,  6.29s/it]

Epoch 37, Test loss: 93.7250868023586
Epoch 38, Train loss: 94.6535293387946


 76%|███████▌  | 38/50 [03:50<01:14,  6.24s/it]

Epoch 38, Test loss: 93.68486038127097
Epoch 39, Train loss: 94.66329450409222


 78%|███████▊  | 39/50 [03:57<01:10,  6.44s/it]

Epoch 39, Test loss: 93.68335136143564
Epoch 40, Train loss: 94.6668174123619


 80%|████████  | 40/50 [04:03<01:04,  6.43s/it]

Epoch 40, Test loss: 93.64292605378724
Epoch 41, Train loss: 94.65090838413383


 82%|████████▏ | 41/50 [04:09<00:56,  6.28s/it]

Epoch 41, Test loss: 93.65447686756879
Epoch 42, Train loss: 94.54412855196662


 84%|████████▍ | 42/50 [04:15<00:50,  6.27s/it]

Epoch 42, Test loss: 93.48280112878406
Epoch 43, Train loss: 94.6878159129825


 86%|████████▌ | 43/50 [04:21<00:43,  6.22s/it]

Epoch 43, Test loss: 93.68809623338225
Epoch 44, Train loss: 94.68427435446023


 88%|████████▊ | 44/50 [04:27<00:36,  6.12s/it]

Epoch 44, Test loss: 93.65792133967614
Epoch 45, Train loss: 94.65709544784376


 90%|█████████ | 45/50 [04:33<00:29,  6.00s/it]

Epoch 45, Test loss: 93.65765189185268
Epoch 46, Train loss: 94.65667538396755


 92%|█████████▏| 46/50 [04:39<00:23,  5.88s/it]

Epoch 46, Test loss: 93.62545660133209
Epoch 47, Train loss: 94.57405035885031


 94%|█████████▍| 47/50 [04:44<00:17,  5.85s/it]

Epoch 47, Test loss: 93.64957918782832
Epoch 48, Train loss: 94.5821417993565


 96%|█████████▌| 48/50 [04:50<00:11,  5.85s/it]

Epoch 48, Test loss: 93.34744163401244
Epoch 49, Train loss: 94.68297545829456


 98%|█████████▊| 49/50 [04:56<00:05,  5.90s/it]

Epoch 49, Test loss: 93.68424216027941
Epoch 50, Train loss: 94.66998190103735


100%|██████████| 50/50 [05:02<00:00,  6.06s/it]

Epoch 50, Test loss: 93.65443225195371





In [10]:
loss_mae = 0
loss_mape = 0
count_total = 0
for graph, temporal_features in test_loader:
    count = 0
    output = graph.x[:,-1]
    output = output.unsqueeze(-1)
    target = graph.y
    loss_mae += MAE(output, target)

    maskP = target >= 1
    countP = maskP.sum().item()

    outputP = output[maskP]
    targetP = target[maskP]

    if torch.all(graph.x[:,-1] == 0):
        print(graph.x)
        # print(targetP.shape)
    l = MAPE(outputP, targetP)
    loss_mape += l
    
    count_total += countP
    # print(count_total)

# print(count_total)
# print(loss_mape)

print("En utilisant la dernière valeur du flow comme prédiction :")
print(f"Test MAE: {loss_mae / len(test_loader)}")
print(f"Test MAPE: {loss_mape / count_total}")

tensor([[0.0000, 0.0000, 0.0000, 0.0000],
        [2.5924, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000, 0.0000],
        [0.0157, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
  

Compliqué d'utiliser la MAPE car trop de targets égale ou proche de zéro