In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm

from models import GAEModel

In [2]:
network = RoadNetwork()
network.load("../osm_data/porto")
df = pd.read_csv("../datasets/trajectories/Porto/road_segment_map_final.csv", sep=";", usecols=["id", "cpath"])

In [None]:
torch.cuda.is_available()

True

In [21]:
from torch_geometric.data import Data
import torch_geometric.transforms as T

# create pyg dataset
data = network.generate_road_segment_pyg_dataset()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
])
data = transform(data)
model = GAEModel(data, device=device, emb_dim=128)
model.train(epochs=10000)
model.save_model(path="../model_states/gae/")
model.save_emb(path="../model_states/gae/")


Epoch: 100, avg_loss: 1.2791578960418701
Epoch: 200, avg_loss: 1.2312979972362519
Epoch: 300, avg_loss: 1.1968167062600454
Epoch: 400, avg_loss: 1.172056510746479
Epoch: 500, avg_loss: 1.1517406213283539
Epoch: 600, avg_loss: 1.1372365782658258
Epoch: 700, avg_loss: 1.1262563954080853
Epoch: 800, avg_loss: 1.1167573767900467
Epoch: 900, avg_loss: 1.108552789290746
Epoch: 1000, avg_loss: 1.1014245734214783
Epoch: 1100, avg_loss: 1.0952021745118228
Epoch: 1200, avg_loss: 1.0895548352599145
Epoch: 1300, avg_loss: 1.0844366610967195
Epoch: 1400, avg_loss: 1.0795425428662981
Epoch: 1500, avg_loss: 1.0749889730612436
Epoch: 1600, avg_loss: 1.07084790173918
Epoch: 1700, avg_loss: 1.0670086788429933
Epoch: 1800, avg_loss: 1.0634348378247684
Epoch: 1900, avg_loss: 1.0602191099053935
Epoch: 2000, avg_loss: 1.0571646459400654
Epoch: 2100, avg_loss: 1.0542653743426005
Epoch: 2200, avg_loss: 1.0517519599741156
Epoch: 2300, avg_loss: 1.0491167515516282
Epoch: 2400, avg_loss: 1.0466371060411135
Epoch

TypeError: save_model() got an unexpected keyword argument 'save_name'

In [22]:
z = model.model.encode(data.x, data.edge_index)
z.shape

torch.Size([11331, 128])

In [23]:
model.save_model(path="../model_states/gae/")
model.save_emb(path="../model_states/gae/")

In [24]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

X = z.detach().cpu().numpy() # embedding for each node
# train simple classifier on 80% of data with cross validation
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

#mask = ((y==11) | (y==10) | (y==9) | (y==4) | (y==1) | (y==2) | (y==12) | (y==7)) # remove uncommon tags
#X = X[~mask, :]
#y = y[~mask]
print(np.unique(y, return_counts=True))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]), array([1680,   90,  144,  577,   76, 5832, 1451,   98, 1213,   20,   13,
         11,  126]))
X_train dimension=  (9064, 128)
X_test dimension=  (2267, 128)
y_train dimension=  (9064,)
y_test dimension=  (2267,)


In [25]:
lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
lm.fit(X_train, y_train)
print(metrics.classification_report(y_test, lm.predict(X_test)))

              precision    recall  f1-score   support

           0       0.83      0.85      0.84       309
           1       0.63      0.67      0.65        18
           2       0.60      0.39      0.47        31
           3       0.65      0.55      0.60       133
           4       0.00      0.00      0.00        13
           5       0.76      0.86      0.81      1175
           6       0.57      0.40      0.47       301
           7       0.00      0.00      0.00        20
           8       0.62      0.67      0.65       221
           9       0.50      0.20      0.29         5
          10       0.50      0.25      0.33         4
          11       0.00      0.00      0.00         3
          12       1.00      0.47      0.64        34

    accuracy                           0.73      2267
   macro avg       0.51      0.41      0.44      2267
weighted avg       0.71      0.73      0.72      2267



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False),
])
dataset = Planetoid(".", "Cora", transform=transform)
t,v, te = dataset[0]
t

Data(x=[2708, 1433], edge_index=[2, 8976], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], pos_edge_label=[4488], pos_edge_label_index=[2, 4488])

In [21]:
from torch_geometric.utils import train_test_split_edges
device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False)
])
print(data)
transform(data)
test = train_test_split_edges(data)

print(test)

Data(x=[11331, 8], edge_index=[2, 26617])




Data(x=[11331, 8], val_pos_edge_index=[2, 699], test_pos_edge_index=[2, 1399], train_pos_edge_index=[2, 23788], train_neg_adj_mask=[11331, 11331], val_neg_edge_index=[2, 699], test_neg_edge_index=[2, 1399])
