In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork, Trajectory
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch_geometric.transforms as T

from models import GTNModel, GTCModel, Traj2VecModel, Node2VecModel, GAEModel, GCNEncoder, SRN2VecModel
from evaluation.tasks import TravelTimeEstimation, NextLocationPrediciton, DestinationPrediciton

In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
trajectory = Trajectory("../../datasets/trajectories/Porto/road_segment_map_final.csv", nrows=10000000).generate_TTE_datatset()
traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features.fillna(0, inplace=True)

data_roadclf = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["highway_enc"], traj_data=None)
data_rest = network.generate_road_segment_pyg_dataset(include_coords=True, traj_data=None)

adj = np.loadtxt("./gtn_precalc_adj/traj_adj_k_2.gz")
# adj_k3 = np.loadtxt("./gtn_precalc_adj/traj_adj_k_3.gz")
adj_sample = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1_False_no_selfloops_smoothed.gz")

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

In [11]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(trajectory, test_size=0.1, random_state=69)

In [4]:
# create init emb from gtc and traj2vec concat
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2VecModel(data_rest, network, adj, device=device, emb_dim=128)
traj2vec.load_model("../model_states/traj2vec/model_base.pt")
gtc = GTCModel(data_rest, device, network, None, adj=adj)
gtc.load_model("../model_states/gtc/model_base.pt")
node2vec = Node2VecModel(data_rest, device=device, q=4, p=1)
node2vec.load_model("../model_states/node2vec/model_base.pt")
gae = GAEModel(data_rest, device=device, encoder=GCNEncoder, emb_dim=128)
gae.load_model("../model_states/gaegcn/model_base.pt")
srn = SRN2VecModel(None, device, network, remove_highway_label=False)
srn.load_dataset("./srn2vec-traindata.json")
srn.load_model("../model_states/srn2vec/model_base.pt")


init_emb = torch.Tensor(np.concatenate([gtc.load_emb(), traj2vec.load_emb()], axis=1))
add_emb = torch.Tensor(gtc.load_emb() + traj2vec.load_emb())


In [5]:
# init GTN Model
model = GTNModel(data_rest, device, network, train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=512)
model.load_model("../model_states/gtn/model_base_gtc_k2_10e_noutil_autoreg.pt")
# gtn_add = GTNModel(data_rest, device, network, train, traj_features, add_emb, adj_sample, batch_size=32, emb_dim=128, hidden_dim=128)
# gtn_add.load_model("../model_states/gtn/model_base_add.pt")
# gtn_con = GTNModel(data_rest, device, network, train, traj_features, init_emb, adj_sample, batch_size=32)
# gtn_con.load_model("../model_states/gtn/model_base_concat.pt")
# gtn_trans = GTNModel(data_rest, device, network, train, traj_features, None, adj_sample, batch_size=32, emb_dim=128, hidden_dim=128)
# gtn_trans.load_model("../model_states/gtn/model_base_only_trans.pt")
# gtn_con_25 = GTNModel(data_rest, device, network, train, traj_features, init_emb, adj_sample, batch_size=32)
# gtn_con_25.load_model("../model_states/gtn/model_base.pt")

right insert


In [20]:
model2 = GTNModel(data_rest, device, network, train, traj_features, init_emb, adj_sample, batch_size=512, hidden_dim=256)
model2.load_model("../model_states/gtn/model_base_gtc_k2_10e_noutil_noautoreg_32_batch.pt")

right insert


In [9]:
model.train(epochs=2)

Epoch: 0, iter 0 loss: 1.0847514867782593, masked traj loss 0.577, judge traj loss 0.508
Epoch: 0, iter 10 loss: 1.0973563194274902, masked traj loss 0.599, judge traj loss 0.498
Epoch: 0, iter 20 loss: 1.1238110065460205, masked traj loss 0.631, judge traj loss 0.493
Epoch 0 avg_loss=1.3245706991715864 total_acc= 73.14285714285714
Epoch: 1, iter 0 loss: 1.3249919414520264, masked traj loss 0.871, judge traj loss 0.454
Epoch: 1, iter 10 loss: 1.2418620586395264, masked traj loss 0.756, judge traj loss 0.486
Epoch: 1, iter 20 loss: 1.712965726852417, masked traj loss 1.154, judge traj loss 0.559
Epoch 1 avg_loss=1.3454584370959888 total_acc= 74.57142857142857


In [21]:
# emb = model.load_emb()[2:]
# emb2 = model2.load_emb()[2:]
emb3 = model2.load_emb()[2:]
# emb2 = model2.load_emb()
# emb.shape

In [22]:
node2vec_emb = node2vec.load_emb()
gae_emb = gae.load_emb()
srn_emb = srn.load_emb()
n2v_emb = node2vec.load_emb()
# gtn_add_emb = gtn_add.load_emb()
# gtn_concat_emb = gtn_con.load_emb()
# gtn_only_trans_emb = gtn_trans.load_emb()
# gtn_con_25_emb = gtn_con_25.load_emb()
rand_emb = np.random.randn(*emb3.shape)

In [19]:
torch.save(model.model.state_dict(), os.path.join("../model_states/gtn/" + "/model.pt"))

In [23]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

idxs = np.arange(len(network.line_graph.nodes))
# train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

# z = emb
emb_full = np.concatenate((init_emb, emb3), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
# zadd = np.add(emb, init_emb)
# zcnn = np.concatenate((zn, z4), axis=1)
# zctn = np.concatenate((zn, z5), axis=1)
# X = z # embedding for each node
eva = [emb3, emb_full, init_emb, gae_emb, srn_emb, rand_emb]
for X in eva:
    # X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
    # lm.fit(X_train, y_train)
    scorer = make_scorer(metrics.f1_score, average="macro")
    print(np.mean(cross_val_score(estimator=lm, X=X, y=y, scoring=scorer, cv=5)))

0.7304362871951083
0.8186748315183644
0.85607575965873
0.6261817759851643
0.8517336485185958
0.054525170059597936


In [25]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from sklearn.neural_network import MLPRegressor

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)

idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)

y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)
y = y.values

# z = emb
# zct = np.concatenate((init_emb, emb), axis=1)
# zadd = np.add(emb, init_emb)
# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb3, init_emb, srn_emb, n2v_emb]
for X in eva:
    decoder = MLPRegressor(hidden_layer_sizes=(1024), random_state=88, max_iter=30)

    #X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    # decoder.fit(X_train, y_train)
    scorer = make_scorer(metrics.mean_absolute_error)
    print(np.mean(cross_val_score(estimator=decoder, X=X, y=y, scoring=scorer, cv=5)))

12.982448581991415
12.725772435475433
15.861414443131949
14.285923659867086


In [10]:
travel_time_est = TravelTimeEstimation(
    traj_dataset=test,
    network=network,
    device=device,
    batch_size=128,
    epochs=5,
    seed=88,
)
# travel_time_est.register_metric(
#     name="MSE", metric_func=metrics.mean_squared_error, args={}
# )
travel_time_est.register_metric(
    name="MAE", metric_func=metrics.mean_absolute_error, args={}
)

# z = emb
zct = np.concatenate((init_emb, emb3), axis=1)
# zadd = np.add(emb, init_emb)
# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb3, zct, n2v_emb, srn_emb]
for X in eva:
    print(travel_time_est.evaluate(X))

{'MAE': 80.1711030956158}
{'MAE': 79.77212380921698}
{'MAE': 77.8282006005686}


KeyboardInterrupt: 

In [26]:
nextlocation_pred = NextLocationPrediciton(
    traj_dataset=test,
    network=network,
    device=device,
    batch_size=128,
    epochs=5,
    seed=88,
)

nextlocation_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

# z = emb
# zctn = np.concatenate((init_emb, emb), axis=1)
# zadd = np.add(emb, init_emb)
# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb3, n2v_emb, srn_emb]
for X in eva:
    print(nextlocation_pred.evaluate(X))

Pandas Apply:   0%|          | 0/108096 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/108096 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/46328 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/46328 [00:00<?, ?it/s]

                                                 

Average training loss in episode 0: 85.58177826559755


                                                 

Average training loss in episode 1: 59.24939918292345


                                                 

Average training loss in episode 2: 49.3140873835637


                                                 

Average training loss in episode 3: 44.69477894122784


                                                 

Average training loss in episode 4: 41.17308108312844
{'accuracy': 0.46643498532205147}


Pandas Apply:   0%|          | 0/108096 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/108096 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/46328 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/46328 [00:00<?, ?it/s]

                                                 

Average training loss in episode 0: 107.92889009735288


                                                 

Average training loss in episode 1: 108.84033501980572


                                                 

KeyboardInterrupt: 

In [13]:
dest_pred = DestinationPrediciton(
    traj_dataset=test,
    network=network,
    device=device,
    batch_size=128,
    epochs=5,
    seed=88,
)

dest_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

# emb_full = np.concatenate((init_emb, emb), axis=1)
# emb_full2 = np.concatenate((init_emb, emb2), axis=1)
# emb_gtc = np.concatenate((gtc.load_emb(), emb), axis=1)
eva = [emb3, n2v_emb]
for X in eva:
    print(dest_pred.evaluate(X))

Pandas Apply:   0%|          | 0/370616 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/370616 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/92655 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/92655 [00:00<?, ?it/s]

Average training loss in episode 0: 5.167947872814553
Average training loss in episode 1: 4.283290731890426
Average training loss in episode 2: 4.077435616959525
Average training loss in episode 3: 3.976194605313612
Average training loss in episode 4: 3.916674303745038
{'accuracy': 0.1951324807080028}
Average training loss in episode 0: 5.240711502525029
Average training loss in episode 1: 4.168358718986669
Average training loss in episode 2: 3.8998264221690637
Average training loss in episode 3: 3.766718561231102
Average training loss in episode 4: 3.6838043355151435
{'accuracy': 0.21757055744428255}
