In [4]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork, Trajectory
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch_geometric.transforms as T

from models import GTNModel, GTCModel, Traj2VecModel, Node2VecModel, GAEModel, GCNEncoder, SRN2VecModel
from evaluation.tasks import TravelTimeEstimation, NextLocationPrediciton, DestinationPrediciton

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
trajectory = Trajectory("../../datasets/trajectories/Porto/road_segment_map_final.csv", nrows=500000).generate_TTE_datatset()
traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features.fillna(0, inplace=True)

data_roadclf = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["highway_enc"], traj_data=None)
data_rest = network.generate_road_segment_pyg_dataset(include_coords=True, traj_data=None)

adj = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1.gz")
adj_sample = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1_False_no_selfloops_smoothed.gz")

Pandas Apply:   0%|          | 0/500000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/500000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/500000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/500000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/500000 [00:00<?, ?it/s]

In [139]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(trajectory, test_size=0.3, random_state=69)

In [6]:
# create init emb from gtc and traj2vec concat
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2VecModel(data_roadclf, network, adj, device=device, emb_dim=128)
traj2vec.load_model("../model_states/traj2vec/model_base.pt")
gtc = GTCModel(data_roadclf, device, network, None, adj=adj)
gtc.load_model("../model_states/gtc/model_noroad.pt")
node2vec = Node2VecModel(data_roadclf, device=device, q=4, p=1)
node2vec.load_model("../model_states/node2vec/model_base.pt")
gae = GAEModel(data_roadclf, device=device, encoder=GCNEncoder, emb_dim=128)
gae.load_model("../model_states/gaegcn/model_noroad.pt")
srn = SRN2VecModel(None, device, network, remove_highway_label=True)
srn.load_dataset("./srn2vec-traindata.json")
srn.load_model("../model_states/srn2vec/model_noroad.pt")


init_emb = torch.Tensor(np.concatenate([gtc.load_emb(), traj2vec.load_emb()], axis=1))


In [142]:
# init GTN Model
model = GTNModel(data_roadclf, device, network, train, traj_features, init_emb, adj_sample, batch_size=32)
# model.load_model("../model_states/gtn/model_noroad.pt")

In [143]:
model.train(epochs=10)

Epoch: 0, iter 0 loss: 711587456.0, masked traj loss 0.847, judge traj loss 0.689, util loss 711587456.000
Epoch: 0, iter 500 loss: 603249408.0, masked traj loss 0.573, judge traj loss 0.694, util loss 603249408.000
Epoch: 0, iter 1000 loss: 566962176.0, masked traj loss 0.585, judge traj loss 0.688, util loss 566962176.000
Epoch: 0, iter 1500 loss: 478014816.0, masked traj loss 0.994, judge traj loss 0.686, util loss 478014816.000
Epoch: 0, iter 2000 loss: 665787072.0, masked traj loss 0.922, judge traj loss 0.687, util loss 665787072.000
Epoch: 0, iter 2500 loss: 605643904.0, masked traj loss 0.832, judge traj loss 0.685, util loss 605643904.000
Epoch: 0, iter 3000 loss: 731862592.0, masked traj loss 0.656, judge traj loss 0.691, util loss 731862592.000
Epoch: 0, iter 3500 loss: 1114920448.0, masked traj loss 0.753, judge traj loss 0.596, util loss 1114920448.000
Epoch: 0, iter 4000 loss: 504528704.0, masked traj loss 1.192, judge traj loss 0.639, util loss 504528704.000
Epoch: 0, it

In [144]:
emb = model.model.transformer.embed.tok_embed.weight.data.cpu().numpy()
emb.shape

(11331, 256)

In [145]:
node2vec_emb = node2vec.load_emb()
gae_emb = gae.load_emb()
srn_emb = srn.load_emb()

In [19]:
torch.save(model.model.state_dict(), os.path.join("../model_states/gtn/" + "/model.pt"))

In [146]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

idxs = np.arange(len(network.line_graph.nodes))
# train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

z = emb
zct = np.concatenate((init_emb, emb), axis=1)
zadd = np.add(emb, init_emb)
# zcnn = np.concatenate((zn, z4), axis=1)
# zctn = np.concatenate((zn, z5), axis=1)
# X = z # embedding for each node
eva = [z, zct, zadd, init_emb, gae_emb, node2vec_emb, srn_emb]
for X in eva:
    # X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
    # lm.fit(X_train, y_train)
    scorer = make_scorer(metrics.f1_score, average="macro")
    print(np.mean(cross_val_score(estimator=lm, X=X, y=y, scoring=scorer, cv=5)))

0.3947719080530501
0.42961658189556384
0.4372306680973404
0.44664834892379923
0.25823137610760594
0.20901425528334538
0.06776537821165748


In [147]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)

idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)

y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)
y = y.values

z = emb
zct = np.concatenate((init_emb, emb), axis=1)
zadd = np.add(emb, init_emb)
eva = [z, zct, zadd, init_emb, gae_emb, node2vec_emb, srn_emb]
for X in eva:
    decoder = linear_model.LinearRegression(fit_intercept=True)

    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    # decoder.fit(X_train, y_train)
    scorer = make_scorer(metrics.mean_absolute_error)
    print(np.mean(cross_val_score(estimator=decoder, X=X, y=y, scoring=scorer, cv=5)))

14.143396457691676
28.72703877806518
14.17213249946948
23.887047557215617
14.368647958034728
15.343440512087856
15.632968640714669


In [148]:
travel_time_est = TravelTimeEstimation(
    traj_dataset=test,
    network=network,
    device=device,
    batch_size=256,
    epochs=5,
    seed=88,
)
travel_time_est.register_metric(
    name="MSE", metric_func=metrics.mean_squared_error, args={}
)
travel_time_est.register_metric(
    name="MAE", metric_func=metrics.mean_absolute_error, args={}
)

z = emb
zct = np.concatenate((init_emb, emb), axis=1)
zadd = np.add(emb, init_emb)
eva = [z, zct, zadd, init_emb, gae_emb, node2vec_emb, srn_emb]
for X in eva:
    print(travel_time_est.evaluate(X))

{'MSE': 12122.260076701406, 'MAE': 76.50991675745647}
{'MSE': 11803.74874593377, 'MAE': 76.66678889745077}
{'MSE': 11996.927491581646, 'MAE': 78.08713494930268}
{'MSE': 11954.191135516034, 'MAE': 76.86543172111512}
{'MSE': 12889.226716738953, 'MAE': 80.59984841804504}
{'MSE': 12825.068171002236, 'MAE': 81.41830803044637}
{'MSE': 11932.491779679805, 'MAE': 77.10150223821005}


In [149]:
nextlocation_pred = NextLocationPrediciton(
    traj_dataset=test,
    network=network,
    device=device,
    batch_size=256,
    epochs=5,
    seed=88,
)

nextlocation_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

z = emb
zctn = np.concatenate((init_emb, emb), axis=1)
zadd = np.add(emb, init_emb)
eva = [z, zct, zadd, init_emb, gae_emb, node2vec_emb, srn_emb]
for X in eva:
    print(nextlocation_pred.evaluate(X))

Pandas Apply:   0%|          | 0/120000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/120000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/30000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/30000 [00:00<?, ?it/s]

Average training loss in episode 0: 94.48265817475472
Average training loss in episode 1: 57.049108859064226
Average training loss in episode 2: 38.65703039840341
Average training loss in episode 3: 28.943796719061034
Average training loss in episode 4: 22.36021076031585
{'accuracy': 0.5639333333333333}
Average training loss in episode 0: 92.04217865217977
Average training loss in episode 1: 53.717257957214485
Average training loss in episode 2: 36.37113255173413
Average training loss in episode 3: 26.49525107605371
Average training loss in episode 4: 20.53188714594729
{'accuracy': 0.6016}
Average training loss in episode 0: 91.76211930134657
Average training loss in episode 1: 55.27608842178702
Average training loss in episode 2: 37.71371385537739
Average training loss in episode 3: 27.420851697291393
Average training loss in episode 4: 21.131240116761944
{'accuracy': 0.5858333333333333}
Average training loss in episode 0: 98.54018650990305
Average training loss in episode 1: 68.48151

In [150]:
dest_pred = DestinationPrediciton(
    traj_dataset=test,
    network=network,
    device=device,
    batch_size=256,
    epochs=5,
    seed=88,
)

dest_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

z = emb
zct = np.concatenate((init_emb, emb), axis=1)
zadd = np.add(emb, init_emb)
eva = [z, zct, zadd, init_emb, gae_emb, node2vec_emb, srn_emb]
for X in eva:
    print(dest_pred.evaluate(X))

Pandas Apply:   0%|          | 0/120000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/120000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/30000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/30000 [00:00<?, ?it/s]

Average training loss in episode 0: 6.330635722512121
Average training loss in episode 1: 4.574502902752809
Average training loss in episode 2: 3.984619103006717
Average training loss in episode 3: 3.6476753185044473
Average training loss in episode 4: 3.4101818782180104
{'accuracy': 0.23533333333333334}
Average training loss in episode 0: 6.174136269575497
Average training loss in episode 1: 4.450191152629568
Average training loss in episode 2: 3.8868592908895856
Average training loss in episode 3: 3.562092793775774
Average training loss in episode 4: 3.332198644751933
{'accuracy': 0.2413}
Average training loss in episode 0: 6.1727608353344365
Average training loss in episode 1: 4.460918168777596
Average training loss in episode 2: 3.8801761995246418
Average training loss in episode 3: 3.537017314927156
Average training loss in episode 4: 3.28957672058138
{'accuracy': 0.24276666666666666}
Average training loss in episode 0: 6.486407523978748
Average training loss in episode 1: 4.66854