In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm

from models import Node2VecModel

In [2]:
city = "porto"
city_traj = "Porto"

In [3]:
network = RoadNetwork()
network.load(f"../../osm_data/{city}")

In [4]:
data = network.generate_road_segment_pyg_dataset(traj_data=None, include_coords=True, dataset=city)

In [6]:
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device_count())
torch.cuda.set_device(1)
print(torch.cuda.current_device())
print(torch.cuda.device_count())

True
1
4
1
4


In [5]:
from torch_geometric.data import Data
import torch_geometric.transforms as T

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    T.ToDevice(device),
])
data = transform(data)
model = Node2VecModel(data, device=device, q=1, p=1, negative_samples=3)
model.train(epochs=20)

In [8]:
model.save_model(path="../model_states/deepwalk/")
# model.save_emb(path="../model_states/node2vec/")

In [6]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

X = model.load_emb() # embedding for each node
# train simple classifier on 80% of data with cross validation
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

#mask = ((y==11) | (y==10) | (y==9) | (y==4) | (y==1) | (y==2) | (y==12) | (y==7)) # remove uncommon tags
#X = X[~mask, :]
#y = y[~mask]
print(np.unique(y, return_counts=True))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]), array([1680,   90,  144,  577,   76, 5832, 1451,   98, 1213,   20,   13,
         11,  126]))
X_train dimension=  (9064, 128)
X_test dimension=  (2267, 128)
y_train dimension=  (9064,)
y_test dimension=  (2267,)


In [7]:
lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
lm.fit(X_train, y_train)
print(metrics.classification_report(y_test, lm.predict(X_test)))

              precision    recall  f1-score   support

           0       0.60      0.44      0.51       309
           1       0.73      0.61      0.67        18
           2       0.64      0.52      0.57        31
           3       0.62      0.45      0.52       133
           4       0.33      0.08      0.12        13
           5       0.64      0.85      0.73      1175
           6       0.46      0.33      0.38       301
           7       0.67      0.10      0.17        20
           8       0.44      0.19      0.27       221
           9       0.00      0.00      0.00         5
          10       0.50      0.50      0.50         4
          11       0.00      0.00      0.00         3
          12       0.62      0.24      0.34        34

    accuracy                           0.61      2267
   macro avg       0.48      0.33      0.37      2267
weighted avg       0.58      0.61      0.58      2267

