In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork, Trajectory
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch_geometric.transforms as T

from models import GTNModel, GTCModel, Traj2VecModel
from evaluation.tasks import TravelTimeEstimation, NextLocationPrediciton

In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
trajectory = Trajectory("../../datasets/trajectories/Porto/road_segment_map_final.csv", nrows=300000).generate_TTE_datatset()

data_roadclf = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["highway_enc"], traj_data=None)
data_rest = network.generate_road_segment_pyg_dataset(include_coords=True, traj_data=None)

adj = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1.gz")

Pandas Apply:   0%|          | 0/300000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/300000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/300000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/300000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/300000 [00:00<?, ?it/s]

In [3]:
# create init emb from gtc and traj2vec concat
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2VecModel(data_roadclf, network, adj, device=device, emb_dim=128)
traj2vec.load_model("../model_states/traj2vec/model_base.pt")
gtc = GTCModel(data_roadclf, device, network, None, adj=adj)
gtc.load_model("../model_states/gtc/model_noroad.pt")

init_emb = torch.Tensor(np.concatenate([gtc.load_emb(), traj2vec.load_emb()], axis=1))


In [15]:
# init GTN Model
model = GTNModel(data_roadclf, device, network, trajectory, init_emb, batch_size=256)

In [16]:
model.train(epochs=4)

Epoch: 0, iter 0 loss: 1.8436007499694824
Epoch: 0, iter 100 loss: 1.442795753479004
Epoch: 0, iter 200 loss: 1.5422964096069336
Epoch: 0, iter 300 loss: 1.493300437927246
Epoch: 0, iter 400 loss: 1.5058035850524902
Epoch: 0, iter 500 loss: 1.4990227222442627
Epoch: 0, iter 600 loss: 1.5653284788131714
Epoch: 0, iter 700 loss: 1.5668137073516846
Epoch: 0, iter 800 loss: 1.4899424314498901
Epoch: 0, iter 900 loss: 1.5259298086166382
Epoch: 0, iter 1000 loss: 1.3216097354888916
Epoch: 0, iter 1100 loss: 1.4453686475753784
Epoch: 1, iter 0 loss: 1.5750726461410522
Epoch: 1, iter 100 loss: 1.500986933708191
Epoch: 1, iter 200 loss: 1.4377771615982056
Epoch: 1, iter 300 loss: 1.4004288911819458
Epoch: 1, iter 400 loss: 1.4504314661026
Epoch: 1, iter 500 loss: 1.4789103269577026
Epoch: 1, iter 600 loss: 1.5336271524429321
Epoch: 1, iter 700 loss: 1.4764974117279053
Epoch: 1, iter 800 loss: 1.4078818559646606
Epoch: 1, iter 900 loss: 1.4049170017242432
Epoch: 1, iter 1000 loss: 1.502930521965

In [17]:
emb = model.model.transformer.embed.tok_embed.weight.data.cpu().numpy()
emb.shape

(11331, 256)

In [19]:
torch.save(model.model.state_dict(), os.path.join("../model_states/gtn/" + "/model.pt"))

In [18]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

z = emb
zct = np.concatenate((emb, init_emb), axis=1)
# zcnn = np.concatenate((zn, z4), axis=1)
# zctn = np.concatenate((zn, z5), axis=1)
# X = z # embedding for each node
eva = [z, zct, init_emb]
for X in eva:
    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
    lm.fit(X_train, y_train)
    scorer = make_scorer(metrics.f1_score, average="macro")
    print(np.mean(cross_val_score(estimator=lm, X=X_test, y=y_test, scoring=scorer, cv=5)))

0.2769757234281964
0.3655384219445935
0.48479595960566313
