In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch_geometric.transforms as T
from evaluation.tasks import (DestinationPrediciton, NextLocationPrediciton,
                              TravelTimeEstimation)
from generator import RoadNetwork, Trajectory
from models import (GAEModel, GCNEncoder, GTCModel, GTNModel, Node2VecModel,
                    SRN2VecModel, Traj2VecModel)
from torch.utils.data import DataLoader
from tqdm import tqdm


In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
traj_train = pd.read_pickle(
    f"../../datasets/trajectories/Porto/traj_train_test_split/train_69.pkl"
)
traj_train["seg_seq"] = traj_train["seg_seq"].map(np.array)
traj_test = pd.read_pickle(
    f"../../datasets/trajectories/Porto/traj_train_test_split/test_69.pkl"
)
traj_test["seg_seq"] = traj_test["seg_seq"].map(np.array)
traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features.fillna(0, inplace=True)

data_roadclf = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["highway_enc"], traj_data=None)
data_rest = network.generate_road_segment_pyg_dataset(include_coords=True, traj_data=None)

adj = np.loadtxt("./gtn_precalc_adj/traj_adj_k_2.gz")
# adj_k3 = np.loadtxt("./gtn_precalc_adj/traj_adj_k_3.gz")
adj_sample = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1_False_no_selfloops_smoothed.gz")

In [11]:
# from sklearn.model_selection import train_test_split

# train, test = train_test_split(trajectory, test_size=0.1, random_state=69)

In [3]:
# create init emb from gtc and traj2vec concat
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2VecModel(data_rest, network, adj=adj, device=device, emb_dim=128)
traj2vec.load_model("../model_states/traj2vec/model_base.pt")
gtc = GTCModel(data_rest, device, network, None, adj=adj)
gtc.load_model("../model_states/gtc/model_base.pt")
node2vec = Node2VecModel(data_rest, device=device, q=4, p=1)
node2vec.load_model("../model_states/node2vec/model_base.pt")
gae = GAEModel(data_rest, device=device, encoder=GCNEncoder, emb_dim=128)
gae.load_model("../model_states/gaegcn/model_base.pt")
srn = SRN2VecModel(None, device, network, remove_highway_label=False)
srn.load_dataset("./srn2vec-traindata-porto.json")
srn.load_model("../model_states/srn2vec/model_base.pt")


init_emb = torch.Tensor(np.concatenate([gtc.load_emb(), traj2vec.load_emb()], axis=1))
add_emb = torch.Tensor(gtc.load_emb() + traj2vec.load_emb())


In [4]:
# init GTN Model
model = GTNModel(data_rest, device, network, traj_train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=512)
model.load_model("../model_states/gtn/model_base_69.pt")
# gtn_add = GTNModel(data_rest, device, network, train, traj_features, add_emb, adj_sample, batch_size=32, emb_dim=128, hidden_dim=128)
# gtn_add.load_model("../model_states/gtn/model_base_add.pt")
# gtn_con = GTNModel(data_rest, device, network, train, traj_features, init_emb, adj_sample, batch_size=32)
# gtn_con.load_model("../model_states/gtn/model_base_concat.pt")
# gtn_trans = GTNModel(data_rest, device, network, train, traj_features, None, adj_sample, batch_size=32, emb_dim=128, hidden_dim=128)
# gtn_trans.load_model("../model_states/gtn/model_base_only_trans.pt")
# gtn_con_25 = GTNModel(data_rest, device, network, train, traj_features, init_emb, adj_sample, batch_size=32)
# gtn_con_25.load_model("../model_states/gtn/model_base.pt")

right insert


In [14]:
h = model.model.transformer(torch.tensor([[2,3,4,5,6,7]]).to(device), None)

torch.Size([1, 6])


In [15]:
h.get_device()

2

In [39]:
from evaluation.tasks.task_loader import init_destination

task = init_destination({"epochs": 6}, traj_test, network, device, seed=69)
stats = task.evaluate(emb=model.model.transformer)

Pandas Apply:   0%|          | 0/370616 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/370616 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/92655 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/92655 [00:00<?, ?it/s]

256


100%|██████████| 724/724 [04:18<00:00,  2.80it/s]
 17%|█▋        | 1/6 [04:18<21:32, 258.51s/it]

Average training loss in episode 0: 7.326306860091278


100%|██████████| 724/724 [04:18<00:00,  2.81it/s]
 33%|███▎      | 2/6 [08:36<17:12, 258.21s/it]

Average training loss in episode 1: 6.868975094010158


 23%|██▎       | 169/724 [01:00<03:17,  2.81it/s]
 33%|███▎      | 2/6 [09:36<19:13, 288.38s/it]


KeyboardInterrupt: 

In [35]:
stats

{'accuracy': 0.003939344881549836}

In [5]:
model2 = GTNModel(data_rest, device, network, traj_train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=256)
model2.load_model("../model_states/gtn/model_base_gtc_k2_10e_noutil_noautoreg_32_batch.pt")

right insert


In [6]:
model3 = GTNModel(data_rest, device, network, traj_train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=512)
model3.load_model("../model_states/gtn/model_base_gtc_k2_10e_noutil_noautoreg.pt")

right insert


In [7]:
model4 = GTNModel(data_rest, device, network, traj_train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=512)
model4.load_model("../model_states/gtn/model_base_gtc_k2_20e_noutil_autoreg.pt")

right insert


In [8]:
model5 = GTNModel(data_rest, device, network, traj_train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=512)
model5.load_model("../model_states/gtn/model_base_gtc_k2_20e_noutil_noautoreg_512_batch.pt")

right insert


In [9]:
model6 = GTNModel(data_rest, device, network, traj_train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=512)
model6.load_model("../model_states/gtn/model_base_gtc_k2_30e_noutil_noautoreg_512_batch.pt")

right insert


In [9]:
model.train(epochs=2)

Epoch: 0, iter 0 loss: 1.0847514867782593, masked traj loss 0.577, judge traj loss 0.508
Epoch: 0, iter 10 loss: 1.0973563194274902, masked traj loss 0.599, judge traj loss 0.498
Epoch: 0, iter 20 loss: 1.1238110065460205, masked traj loss 0.631, judge traj loss 0.493
Epoch 0 avg_loss=1.3245706991715864 total_acc= 73.14285714285714
Epoch: 1, iter 0 loss: 1.3249919414520264, masked traj loss 0.871, judge traj loss 0.454
Epoch: 1, iter 10 loss: 1.2418620586395264, masked traj loss 0.756, judge traj loss 0.486
Epoch: 1, iter 20 loss: 1.712965726852417, masked traj loss 1.154, judge traj loss 0.559
Epoch 1 avg_loss=1.3454584370959888 total_acc= 74.57142857142857


In [10]:
emb = model.load_emb()[2:]
emb2 = model2.load_emb()[2:]
emb3 = model3.load_emb()[2:]
emb4 = model4.load_emb()[2:]
emb5 = model5.load_emb()[2:]
emb6 = model6.load_emb()[2:]
# emb2 = model2.load_emb()
# emb.shape

In [18]:
print(emb6.shape)

(11331, 256)


In [11]:
node2vec_emb = node2vec.load_emb()
gae_emb = gae.load_emb()
srn_emb = srn.load_emb()
n2v_emb = node2vec.load_emb()
# gtn_add_emb = gtn_add.load_emb()
# gtn_concat_emb = gtn_con.load_emb()
# gtn_only_trans_emb = gtn_trans.load_emb()
# gtn_con_25_emb = gtn_con_25.load_emb()
rand_emb = np.random.randn(*emb3.shape)

In [19]:
torch.save(model.model.state_dict(), os.path.join("../model_states/gtn/" + "/model.pt"))

In [13]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

idxs = np.arange(len(network.line_graph.nodes))
# train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

# z = emb
emb_full = np.concatenate((init_emb, emb3), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
# zadd = np.add(emb, init_emb)
# zcnn = np.concatenate((zn, z4), axis=1)
# zctn = np.concatenate((zn, z5), axis=1)
# X = z # embedding for each node
eva = [emb5, emb6, init_emb, gae_emb, srn_emb, n2v_emb, rand_emb]
for X in eva:
    # X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
    # lm.fit(X_train, y_train)
    scorer = make_scorer(metrics.f1_score, average="macro")
    print(np.mean(cross_val_score(estimator=lm, X=X, y=y, scoring=scorer, cv=5)))

0.7836262167247774
0.7768205967059234
0.85607575965873
0.6261817759851643
0.8517336485185958
0.2239445968724524
0.055446008285913215


In [31]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from sklearn.neural_network import MLPRegressor

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)

idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)

y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)
y = y.values

# z = emb
# zct = np.concatenate((init_emb, emb), axis=1)
# zadd = np.add(emb, init_emb)
# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb5, emb6, init_emb, gae_emb, srn_emb, n2v_emb, rand_emb]
for X in eva:
    decoder = MLPRegressor(hidden_layer_sizes=(1024), random_state=88, max_iter=30)

    #X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    # decoder.fit(X_train, y_train)
    scorer = make_scorer(metrics.mean_absolute_error)
    print(np.mean(cross_val_score(estimator=decoder, X=X, y=y, scoring=scorer, cv=5)))

12.266369952847308
12.3138748424747
12.724326544038089
14.236604496291301
15.861414443131949
14.285923659867086
17.839338931717034


In [32]:
travel_time_est = TravelTimeEstimation(
    traj_dataset=traj_test,
    network=network,
    device=device,
    batch_size=128,
    epochs=5,
    seed=88,
)
# travel_time_est.register_metric(
#     name="MSE", metric_func=metrics.mean_squared_error, args={}
# )
travel_time_est.register_metric(
    name="MAE", metric_func=metrics.mean_absolute_error, args={}
)

# z = emb
zct = np.concatenate((init_emb, emb3), axis=1)
# zadd = np.add(emb, init_emb)
# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb5, emb6, init_emb, gae_emb, srn_emb, n2v_emb]
for X in eva:
    print(travel_time_est.evaluate(X))

{'MAE': 75.68767885430262}
{'MAE': 76.16525251575713}
{'MAE': 75.57562885135384}
{'MAE': 78.68336486311374}
{'MAE': 76.10248517295007}
{'MAE': 80.14496336806562}


In [14]:
nextlocation_pred = NextLocationPrediciton(
    traj_dataset=traj_test,
    network=network,
    device=device,
    batch_size=512,
    epochs=5,
    seed=88,
)

nextlocation_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

# z = emb
# zctn = np.concatenate((init_emb, emb), axis=1)
# zadd = np.add(emb, init_emb)
# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb5, n2v_emb, srn_emb, emb6]
for X in eva:
    print(nextlocation_pred.evaluate(X))

Pandas Apply:   0%|          | 0/324289 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/324289 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/138982 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/138982 [00:00<?, ?it/s]

                                                 

Average training loss in episode 0: 64.1776114021566


                                                 

Average training loss in episode 1: 29.95768808190379


                                                 

Average training loss in episode 2: 22.062768059198035


                                                 

Average training loss in episode 3: 18.137576253632265


                                                 

Average training loss in episode 4: 15.9652257777915
{'accuracy': 0.6250305794995036}


                                                 

Average training loss in episode 0: 106.25378229038948


                                                 

Average training loss in episode 1: 101.396855050457


                                                 

Average training loss in episode 2: 93.98275962612982


                                                 

Average training loss in episode 3: 81.5304538474098


                                                 

Average training loss in episode 4: 68.9096606401991
{'accuracy': 0.2600840396598121}


                                                 

Average training loss in episode 0: 67.8003768559886


                                                 

Average training loss in episode 1: 33.295734291197


                                                 

Average training loss in episode 2: 25.011373507863716


                                                 

Average training loss in episode 3: 21.20470808883571


                                                 

Average training loss in episode 4: 19.079350609884653
{'accuracy': 0.6056467744024406}


                                                 

Average training loss in episode 0: 58.3942295399374


                                                 

Average training loss in episode 1: 26.257069139450508


                                                 

Average training loss in episode 2: 19.0066166315169


                                                 

Average training loss in episode 3: 15.441738206881453


                                                 

Average training loss in episode 4: 13.429345633329277
{'accuracy': 0.6416946079348405}


In [27]:
dest_pred = DestinationPrediciton(
    traj_dataset=traj_test[:100000],
    network=network,
    device=device,
    batch_size=128,
    epochs=5,
    seed=88,
)

dest_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb, emb2, emb3, emb4, emb5, emb6, n2v_emb]
for X in eva:
    print(dest_pred.evaluate(X))

Pandas Apply:   0%|          | 0/80000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/80000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/20000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/20000 [00:00<?, ?it/s]

Average training loss in episode 0: 7.412824636077881
Average training loss in episode 1: 6.142995080566406
Average training loss in episode 2: 5.437318682098389
Average training loss in episode 3: 4.99011307220459
Average training loss in episode 4: 4.66860934638977
{'accuracy': 0.14615}
Average training loss in episode 0: 6.536014373016357
Average training loss in episode 1: 5.085628775787353
Average training loss in episode 2: 4.6572041358947756
Average training loss in episode 3: 4.413045981216431
Average training loss in episode 4: 4.245047995376587
{'accuracy': 0.15835}
Average training loss in episode 0: 6.5510960357666015
Average training loss in episode 1: 5.102572080230713
Average training loss in episode 2: 4.635688545227051
Average training loss in episode 3: 4.3731794322967525
Average training loss in episode 4: 4.18681318359375
{'accuracy': 0.1603}
Average training loss in episode 0: 6.565308332061767
Average training loss in episode 1: 5.095686183929443
Average training 