In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork, Trajectory
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch_geometric.transforms as T

from models import GTNModel, GTCModel, GAEModel, Node2VecModel, GCNEncoder, Traj2VecModel
from evaluation.tasks import TravelTimeEstimation, NextLocationPrediciton

In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
trajectory = Trajectory("../../datasets/trajectories/Porto/road_segment_map_final.csv", nrows=100000000).generate_TTE_datatset()

traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features["util"] = (traj_features["util"] - traj_features["util"].min()) / (traj_features["util"].max() - traj_features["util"].min())  # min max normalization
traj_features["avg_speed"] = (traj_features["avg_speed"] - traj_features["avg_speed"].min()) / (traj_features["avg_speed"].max() - traj_features["avg_speed"].min())  # min max normalization
traj_features.fillna(0, inplace=True)

# data = network.generate_road_segment_pyg_dataset(drop_labels=["highway_enc"])
data_roadclf = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["highway_enc"], traj_data=None)
data_meanspeed = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["avg_speed"], traj_data=traj_features.copy())
data_rest = network.generate_road_segment_pyg_dataset(include_coords=True, traj_data=None)

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

In [3]:
adj_bi = np.loadtxt("./gtn_precalc_adj/traj_adj_k_2.gz") # for traj2vec 'traj_adj_k_1_False_no_selfloops_smoothed'
adj_bi_3 = np.loadtxt("./gtn_precalc_adj/traj_adj_k_3.gz")
# adj_for = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1_False.gz")

In [None]:
print(adj[108, 130:140])

In [None]:
walks = Traj2Vec.traj_walk(adj, 5, 10000*[0], 10)
print(walks)

In [None]:
from _walker import random_walks as _random_walks
from scipy import sparse

A = sparse.csr_matrix(adj)
indptr = A.indptr.astype(np.uint32)
indices = A.indices.astype(np.uint32)
weights = A.data.astype(np.float32)

_random_walks(indptr, indices, weights, [100,100,100], 5, 6)

In [None]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2VecModel(
            data,
            network,
            adj,
            device=device,
            emb_dim=128,
            walk_length=30,
            context_size=5,
            walks_per_node=25,
            num_neg=10,
        )
traj2vec.train(epochs=200)

In [None]:
torch.save(traj2vec.state_dict(), "modelt.pt")

In [None]:
data.x = None
data = T.OneHotDegree(128)(data)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# precalc adj matrices
GTCModel(data_rest, device, network, trajectory, k=6, bidirectional=False, add_self_loops=True)

In [4]:
models = []
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model = GTCModel(data_roadclf, device, network, adj=adj_bi)
# model2 = GTCModel(data_roadclf, device, network, trajectory, adj=adj_for)
# model2 = GTNModel(data2, device, network, trajectory, load_traj_adj_path="./gtn_precalc_adj/traj_adj_k_1.gz", norm=True)
# model3 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
# model4 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
# model5 = Node2VecModel(data_roadclf, device=device, q=4, p=1)
#model6 = Traj2VecModel(data_roadclf, network, adj, device=device, emb_dim=128, walk_length=30, context_size=5, walks_per_node=25, num_neg=10)

# models.extend([(model, 5000), (model2, 5000)]) # (model3, 5000), (model4, 5000)

In [20]:
model.train_data.x.shape

torch.Size([11331, 33])

In [5]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# for k in [1]:
#     model = GTNModel(data, device, network, trajectory, load_traj_adj_path="./traj_adj_k_{}.gz".format(k))
#     model.train(epochs=1000)
#     models.append(model)

model.train(epochs=20000)

Epoch: 1000, avg_loss: 1.108349953532219
Epoch: 2000, avg_loss: 1.0935820901989937
Epoch: 3000, avg_loss: 1.0865380076964697
Epoch: 4000, avg_loss: 1.0817484531402588
Epoch: 5000, avg_loss: 1.0784950746774673
Epoch: 6000, avg_loss: 1.076286903699239
Epoch: 7000, avg_loss: 1.0746554876736232
Epoch: 8000, avg_loss: 1.0734245615601539
Epoch: 9000, avg_loss: 1.0724296962155235
Epoch: 10000, avg_loss: 1.0716193663477898
Epoch: 11000, avg_loss: 1.0709444018602372
Epoch: 12000, avg_loss: 1.0703609810769559
Epoch: 13000, avg_loss: 1.0698584491748075
Epoch: 14000, avg_loss: 1.0693966476832117
Epoch: 15000, avg_loss: 1.0689894316752753
Epoch: 16000, avg_loss: 1.0686044336780907
Epoch: 17000, avg_loss: 1.0682511512321584
Epoch: 18000, avg_loss: 1.0679153449866507
Epoch: 19000, avg_loss: 1.067601022362709


In [8]:
z = model.load_emb()
z.shape

(11331, 128)

In [None]:
print(models)

In [6]:
model.save_model(path="../model_states/gtc/")

In [None]:
from torch_geometric.nn.norm import LayerNorm
# load node2vec emb
model5.load_model("../model_states/node2vec/model_base.pt")
z2 = model5.load_emb()
model6.load_model("../model_states/traj2vec/model_base.pt")
z3 = model6.load_emb()

norm = LayerNorm(z3.shape[1], affine=False)
z4 = norm(torch.Tensor(z2)).detach().cpu().numpy()
z5 = norm(torch.Tensor(z3)).detach().cpu().numpy()

In [7]:
gae = GAEModel(data_roadclf, device=device, encoder=GCNEncoder, emb_dim=128, layers=2)
gae.load_model("../model_states/gaegcn/model_base.pt")
gae_emb = gae.load_emb()
gtc = GTCModel(data_roadclf, device, network, adj=adj_bi)
gtc.load_model("../model_states/gtc/model_base.pt")
# gtc2 = GTCModel(data_rest, device, network, adj=adj_bi_3)
# gtc2.load_model("../model_states/gtc/model_base_k3_20k.pt")
t2v = Traj2VecModel(data_rest, network, adj_bi, device=device)
t2v.load_model("../model_states/traj2vec/model_base.pt")
z = np.concatenate([gtc.load_emb(), t2v.load_emb()], axis=1)
# z2 = np.concatenate([gtc2.load_emb(), t2v.load_emb()], axis=1)
rand_emb = np.random.randn(*z.shape)

RuntimeError: Error(s) in loading state_dict for GAE:
	size mismatch for encoder.layers.0.lin.weight: copying a param with shape torch.Size([256, 33]) from checkpoint, the shape in current model is torch.Size([256, 21]).

In [54]:
print(gtc.load_emb())
print("--------")
print(t2v.load_emb())

[[-0.03461973 -0.7446656  -0.36518294 ... -0.12940633 -0.19541745
  -0.41113168]
 [-0.06902651 -0.30286184 -0.42417783 ... -0.11377949 -0.24691841
  -0.15814777]
 [-0.10037328 -0.67797256 -0.45901316 ... -0.14502342 -0.143866
  -0.5151305 ]
 ...
 [-0.03662838  0.23067358  0.02284841 ...  0.09844947 -0.02313899
   0.28437442]
 [-0.52729726 -0.24934813  0.35677475 ... -0.5402291  -0.09656639
   0.44079298]
 [-0.52729726 -0.24934813  0.35677475 ... -0.5402291  -0.09656639
   0.44079298]]
--------
[[ 0.14195527  0.71312606  0.27853405 ...  0.08516737 -0.62053263
   0.17841183]
 [ 0.00569221  0.5116822   0.06596196 ...  0.01285282 -0.41922888
   0.14586426]
 [ 0.00964291  0.76392955  0.12494162 ... -0.02320814 -0.6379421
   0.17545395]
 ...
 [ 0.00233091  0.13886695  0.01223819 ...  0.01058747 -0.02673449
  -0.24037123]
 [-0.40296662 -0.1827692   0.13607268 ...  0.56599444  0.04516564
  -0.09180278]
 [-0.44287586 -0.19512364  0.13986318 ...  0.57609206  0.10952294
  -0.09739541]]


In [12]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(trajectory, test_size=0.1, random_state=69)

In [13]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

# n2v = models[-1]
idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

# for m, e in models:
    # m.train(epochs=e)
    # zn = m.load_emb()
    # zcn = np.concatenate((zn, z2), axis=1)
    # zct = np.concatenate((zn, z3), axis=1)
    # zcnn = np.concatenate((zn, z4), axis=1)
    # zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
eva = [z, gtc.load_emb(), gae_emb, rand_emb]
for X in eva:
    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
    # lm.fit(X_train, y_train)
    scorer = make_scorer(metrics.f1_score, average="macro")
    print(np.mean(cross_val_score(estimator=lm, X=X, y=y, scoring=scorer, cv=5)))
    #print(metrics.classification_report(y_test, lm.predict(X_test)))

0.4652928742256364
0.34928147803563026
0.25823137610760594
0.05440272810234651


In [52]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from sklearn.neural_network import MLPRegressor

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)

# idxs = np.arange(len(network.line_graph.nodes))
#train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)

y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)
y = y.values

# for m, e in models:
#     m.train(epochs=e)
    
    # zn = m.load_emb()
    # zcn = np.concatenate((zn, z2), axis=1)
    # zct = np.concatenate((zn, z3), axis=1)
    # zcnn = np.concatenate((zn, z4), axis=1)
    # zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
eva = [z, gtc.load_emb(), gae_emb, rand_emb] # z, z2, gtc.load_emb(), gtc2.load_emb(), gae_emb, rand_emb]
for X in eva:
    decoder = MLPRegressor(hidden_layer_sizes=(1024), random_state=88, max_iter=30)

    # X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    # decoder.fit(X_train, y_train)
    scorer = make_scorer(metrics.mean_absolute_error)
    print(np.mean(cross_val_score(estimator=decoder, X=X, y=y, scoring=scorer, cv=5)))

12.68308312660881
13.492779042323273
14.231388514448602
17.7632361509869


In [39]:
from evaluation.tasks.task_loader import *
from evaluation.evaluation import Evaluation
from models import ConcateAdapterModel

eva = Evaluation()
eva.register_task("meanspeed", init_meanspeed(None, network, 88))

model = ConcateAdapterModel(None, None, models=[gtc, t2v], aggregator="concate")
model2 = ConcateAdapterModel(None, None, models=[gtc2, t2v], aggregator="concate")

eva.register_model("concat", model)
eva.register_model("concat2", model2)
res = eva.run()

print(res)

[[-0.07796444 -0.55997896 -0.34868354 ...  0.08516737 -0.62053263
   0.17841183]
 [-0.17300586 -0.313582   -0.3508929  ...  0.01285282 -0.41922888
   0.14586426]
 [-0.08473617 -0.48305732 -0.47893226 ... -0.02320814 -0.6379421
   0.17545395]
 ...
 [-0.03662838  0.23067358  0.02284841 ...  0.01058747 -0.02673449
  -0.24037123]
 [-0.52729726 -0.24934813  0.35677475 ...  0.56599444  0.04516564
  -0.09180278]
 [-0.52729726 -0.24934813  0.35677475 ...  0.57609206  0.10952294
  -0.09739541]]


Current task: 100%|██████████| 1/1 [02:10<00:00, 130.97s/it]

[('meanspeed',                 MSE        MAE       RMSE
concat   366.677423  12.915396  19.113661
concat2  369.236217  12.931941  19.173809)]





In [80]:
travel_time_est = TravelTimeEstimation(
    traj_dataset=trajectory,
    network=network,
    device=device,
    batch_size=128,
    epochs=3,
    seed=88,
)
travel_time_est.register_metric(
    name="MSE", metric_func=metrics.mean_squared_error, args={}
)
travel_time_est.register_metric(
    name="MAE", metric_func=metrics.mean_absolute_error, args={}
)

# for i, (m, e) in enumerate(models):
# m.train(epochs=e)
# zn = m.load_emb()
# zcn = np.concatenate((zn, z2), axis=1)
# zct = np.concatenate((zn, z3), axis=1)
# X = z # embedding for each node
eva = [z, z2, gtc.load_emb(), gtc2.load_emb(), gae_emb, rand_emb]
for X in eva:
    print(travel_time_est.evaluate(X))

{'MSE': 9949.570608700878, 'MAE': 71.89226948661805}
{'MSE': 11646.49069475799, 'MAE': 81.10363915457725}
{'MSE': 10092.045623252046, 'MAE': 72.39778632850647}


In [18]:
from sklearn import metrics

nextlocation_pred = NextLocationPrediciton(
    traj_dataset=test,
    network=network,
    device=device,
    batch_size=512,
    epochs=10,
    seed=88,
)

nextlocation_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

# for i, (m, e) in enumerate(models):
#     m.train(epochs=e)
#     zn = m.load_emb()
#     zcn = np.concatenate((zn, z2), axis=1)
#     zct = np.concatenate((zn, z3), axis=1)
#     zcnn = np.concatenate((zn, z4), axis=1)
#     zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
eva = [z, gtc.load_emb(), gae_emb, rand_emb]
for X in eva:
    print(nextlocation_pred.evaluate(X))

Pandas Apply:   0%|          | 0/108096 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/108096 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/46328 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/46328 [00:00<?, ?it/s]

                                                 

Average training loss in episode 0: 93.13604711136728


                                                 

Average training loss in episode 1: 60.42360982355082


                                                 

Average training loss in episode 2: 42.60375679663892


                                                 

Average training loss in episode 3: 33.21575438301518


                                                 

Average training loss in episode 4: 27.45381045791338


                                                 

Average training loss in episode 5: 23.018663941689258


                                                 

Average training loss in episode 6: 19.585921386502825


                                                 

Average training loss in episode 7: 17.329670883574575


                                                 

Average training loss in episode 8: 15.814587629066324


                                                 

Average training loss in episode 9: 14.090249111067575
{'accuracy': 0.6325116560179589}


                                                 

Average training loss in episode 0: 99.54299696436468


                                                 

Average training loss in episode 1: 76.37419421717806


                                                 

Average training loss in episode 2: 59.97953197191347


                                                 

Average training loss in episode 3: 49.218008239314244


                                                 

Average training loss in episode 4: 41.48823459193392


                                                 

Average training loss in episode 5: 36.14445158220687


                                                 

Average training loss in episode 6: 31.431394756964917


                                                 

Average training loss in episode 7: 28.38374802751361


                                                 

Average training loss in episode 8: 25.533734420560442


                                                 

Average training loss in episode 9: 23.22743212501958
{'accuracy': 0.5691374546710413}


                                                 

Average training loss in episode 0: 95.28560181383817


                                                 

Average training loss in episode 1: 65.81340795193078


                                                 

Average training loss in episode 2: 48.93731500517647


                                                 

Average training loss in episode 3: 39.55925354867611


                                                 

Average training loss in episode 4: 33.625449108627606


                                                 

Average training loss in episode 5: 29.306134313907265


                                                 

Average training loss in episode 6: 26.167263624803077


                                                 

Average training loss in episode 7: 23.8469948858585


                                                 

Average training loss in episode 8: 21.672283620204567


                                                 

Average training loss in episode 9: 20.182108267298286
{'accuracy': 0.5863192885512002}


                                                 

Average training loss in episode 0: 71.99475444937652


                                                 

Average training loss in episode 1: 41.86305915184741


                                                 

Average training loss in episode 2: 32.94744706603716


                                                 

Average training loss in episode 3: 28.265848897538095


                                                 

Average training loss in episode 4: 25.153848009289437


                                                 

Average training loss in episode 5: 23.122095629854023


                                                 

Average training loss in episode 6: 22.05950561559425


                                                 

Average training loss in episode 7: 20.81006624563685


                                                 

Average training loss in episode 8: 19.88060889603957


                                                 

Average training loss in episode 9: 19.166528063000374
{'accuracy': 0.6014721118977724}
