In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork, Trajectory
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch_geometric.transforms as T

from models import GTNModel, GAEModel, Node2VecModel, GCNEncoder, Traj2Vec
from evaluation.tasks import TravelTimeEstimation

In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
trajectory = Trajectory("../../datasets/trajectories/Porto/road_segment_map_final.csv", nrows=1000).generate_TTE_datatset()

traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features["util"] = (traj_features["util"] - traj_features["util"].min()) / (traj_features["util"].max() - traj_features["util"].min())  # min max normalization
traj_features["avg_speed"] = (traj_features["avg_speed"] - traj_features["avg_speed"].min()) / (traj_features["avg_speed"].max() - traj_features["avg_speed"].min())  # min max normalization
traj_features.fillna(0, inplace=True)

data = network.generate_road_segment_pyg_dataset(traj_data=traj_features.copy(), drop_labels=["highway_enc"])
data2 = network.generate_road_segment_pyg_dataset(traj_data=traj_features.copy(), drop_labels=["highway_enc"], include_coords=True)

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]


  df["x"] = df.geometry.centroid.x / 100  # normalize to -2/2

  df["y"] = df.geometry.centroid.y / 100  # normalize to -1/1


In [28]:
m = Traj2Vec.map_traj_to_node_ids(trajectory["seg_seq"].values, network, 10)

100000it [00:04, 23826.95it/s]


In [29]:
print(m[-1])

[2844. 2845. 2847. 1476. 1479. 1481. 1486. 1494. 1499. 1505.]


In [4]:
adj = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1_False_no_selfloops_smoothed.gz")

In [146]:
print(adj[108, 130:140])

[0.         0.         0.         0.         0.         0.
 0.83439517 0.00781896 0.15778587 0.        ]


In [153]:
walks = Traj2Vec.traj_walk(adj, 5, 10000*[0], 10)
print(walks)

[[ 0  2  6 16  9]
 [ 0  2  6 16  9]
 [ 0  2  6 16  9]
 ...
 [ 0  2  6 16  9]
 [ 0  2  6 16  9]
 [ 0  2  6 16  9]]


In [147]:
from _walker import random_walks as _random_walks
from scipy import sparse

A = sparse.csr_matrix(adj)
indptr = A.indptr.astype(np.uint32)
indices = A.indices.astype(np.uint32)
weights = A.data.astype(np.float32)

_random_walks(indptr, indices, weights, [100,100,100], 5, 6)

array([[100, 108, 136,  78,  69,  74],
       [100, 108, 136,  78,  69,  74],
       [100, 108, 136,  78,  69,  75],
       [100, 108, 136,  78,  69,  75],
       [100, 108, 136,  78,  69,  73],
       [100, 108, 136,  78,  69,  76],
       [100, 108, 138,  89,  68,  69],
       [100, 108, 136,  78,  69,  75],
       [100, 108, 136,  78,  69,  76],
       [100, 108, 136,  78,  69,  75],
       [100, 108, 136,  78,  69,  74],
       [100, 108, 136,  78,  69,  75],
       [100, 108, 136,  78,  69,  75],
       [100, 108, 136,  78,  69,  74],
       [100, 108, 136,  78,  69,  75]], dtype=uint32)

In [15]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2Vec(
            data,
            network,
            adj,
            embedding_dim=128,
            walk_length=30,
            context_size=5,
            walks_per_node=25,
            num_negative_samples=10,
        ).to(device)
loader = traj2vec.loader(batch_size=128, shuffle=True, num_workers=4)
optimizer = torch.optim.SparseAdam(list(traj2vec.parameters()), lr=0.01)

In [16]:
avg_loss = 0
for e in range(50):
    traj2vec.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = traj2vec.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss += total_loss / len(loader)
    if e > 0 and e % 5 == 0:
        print("Epoch: {}, avg_loss: {}".format(e, avg_loss / e))

Epoch: 5, avg_loss: 2.1407772922783757
Epoch: 10, avg_loss: 1.4511424391457204


In [None]:
data.x = None
data = T.OneHotDegree(128)(data)

In [44]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# precalc adj matrices
GTNModel(data, device, network, trajectory, k=1, bidirectional=False, add_self_loops=False)

100%|██████████| 1544234/1544234 [11:21<00:00, 2267.04it/s]


<models.gtn.GTNModel at 0x7f8dd1e89f40>

In [31]:
models = []
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model = GTNModel(data2, device, network, trajectory, load_traj_adj_path="./gtn_precalc_adj/traj_adj_k_1.gz")
model2 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
model3 = GTNModel(data2, device, network, trajectory, load_traj_adj_path="./gtn_precalc_adj/traj_adj_k_1.gz")
model4 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
model5 = Node2VecModel(data, device=device, q=4, p=1)

models.extend([(model, 5000), (model2, 5000), (model3, 5000), (model4, 5000)])

In [None]:
model.train_data.edge_weight.shape

In [5]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
models = []
# for k in [1]:
#     model = GTNModel(data, device, network, trajectory, load_traj_adj_path="./traj_adj_k_{}.gz".format(k))
#     model.train(epochs=1000)
#     models.append(model)

model3.train(epochs=50)
model.train(epochs=5000)
model2.train(epochs=5000)
models.extend([model, model2, model3])

Epoch: 20, avg_loss: 1.1346867181277005
Epoch: 40, avg_loss: 0.9276655710647613
Epoch: 1000, avg_loss: 1.0282853301167487
Epoch: 2000, avg_loss: 1.0185488188266754


KeyboardInterrupt: 

In [None]:
z = model.model(model.train_data.x, model.train_data.edge_traj_index, model.train_data.edge_weight)
z.shape

In [None]:
print(models)

In [None]:
model.save_model(path="../model_states/gtn/test")

In [21]:
# load node2vec emb
model5.load_model("../model_states/node2vec/model.pt")
z2 = model5.load_emb()

In [24]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

n2v = models[-1]
idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

for m, e in models:
    m.train(epochs=e)
    z = np.concatenate((m.load_emb(), z2), axis=1)
    X = z # embedding for each node

    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
    lm.fit(X_train, y_train)
    scorer = make_scorer(metrics.f1_score, average="macro")
    print(np.mean(cross_val_score(estimator=lm, X=X_test, y=y_test, scoring=scorer, cv=5)))
#print(metrics.classification_report(y_test, lm.predict(X_test)))

Epoch: 1000, avg_loss: 1.004720325767994
Epoch: 2000, avg_loss: 1.0040359412431716
Epoch: 3000, avg_loss: 1.0038682837287585
Epoch: 4000, avg_loss: 1.0037380935698748




0.47076854460100853
Epoch: 500, avg_loss: 1.1095781285762787
Epoch: 1000, avg_loss: 1.084949031829834
Epoch: 1500, avg_loss: 1.0755179046789805
Epoch: 2000, avg_loss: 1.070435450911522
Epoch: 2500, avg_loss: 1.0669233219146728
Epoch: 3000, avg_loss: 1.064109513839086
Epoch: 3500, avg_loss: 1.0617583968298776
Epoch: 4000, avg_loss: 1.0598457372486592
Epoch: 4500, avg_loss: 1.058286092042923




0.46090861267519745


In [25]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)

idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)

y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)
y = y.values

for m, e in models:
    m.train(epochs=e)
    z = np.concatenate((m.load_emb(), z2), axis=1)
    
    decoder = linear_model.LinearRegression(fit_intercept=True)
    X = z

    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    decoder.fit(X_train, y_train)
    scorer = make_scorer(metrics.mean_absolute_error)
    print(np.mean(cross_val_score(estimator=decoder, X=X_test, y=y_test, scoring=scorer, cv=5)))

Epoch: 1000, avg_loss: 1.0043522388935089
Epoch: 2000, avg_loss: 1.0037227797210218
Epoch: 3000, avg_loss: 1.0034920635620752
Epoch: 4000, avg_loss: 1.0033518871963023
5.99349780632178
Epoch: 500, avg_loss: 1.0476357510089875
Epoch: 1000, avg_loss: 1.0465151098966599
Epoch: 1500, avg_loss: 1.0461387006441751
Epoch: 2000, avg_loss: 1.0458801525235175
Epoch: 2500, avg_loss: 1.0457478744506836
Epoch: 3000, avg_loss: 1.0456344858407973
Epoch: 3500, avg_loss: 1.0455226311343058
Epoch: 4000, avg_loss: 1.0454531234800815
Epoch: 4500, avg_loss: 1.0453718082374996
12.711964787733887


In [32]:
travel_time_est = TravelTimeEstimation(
    traj_dataset=trajectory,
    network=network,
    device=device,
    batch_size=128,
    epochs=5,
    seed=88,
)
travel_time_est.register_metric(
    name="MSE", metric_func=metrics.mean_squared_error, args={}
)
travel_time_est.register_metric(
    name="MAE", metric_func=metrics.mean_absolute_error, args={}
)

for i, (m, e) in enumerate(models):
    m.train(epochs=e)
    z = m.load_emb()
    if i >= 2:
        z = np.concatenate((z, z2), axis=1)

    print(travel_time_est.evaluate(z))

Epoch: 1000, avg_loss: 1.045148916721344
Epoch: 2000, avg_loss: 1.03021847063303
Epoch: 3000, avg_loss: 1.0241406110127766
Epoch: 4000, avg_loss: 1.0199422906637192




Average training loss in episode 0: 50864.55742421875
Average training loss in episode 1: 11063.43037890625
Average training loss in episode 2: 10701.5946421875
Average training loss in episode 3: 10436.82530234375
Average training loss in episode 4: 10404.15393515625
{'MSE': 10890.384121063082, 'MAE': 77.89907491512298}
Epoch: 500, avg_loss: 1.1090960187911987
Epoch: 1000, avg_loss: 1.084807618021965
Epoch: 1500, avg_loss: 1.075443676551183
Epoch: 2000, avg_loss: 1.0704160868525505
Epoch: 2500, avg_loss: 1.066892796421051
Epoch: 3000, avg_loss: 1.0641022146145502
Epoch: 3500, avg_loss: 1.061758977310998
Epoch: 4000, avg_loss: 1.0598157776892185
Epoch: 4500, avg_loss: 1.0582583843072255




Average training loss in episode 0: 54722.13613359375
Average training loss in episode 1: 11236.265759375
Average training loss in episode 2: 10853.59340703125
Average training loss in episode 3: 10688.203209375
Average training loss in episode 4: 10539.04138359375
{'MSE': 10031.258017285565, 'MAE': 72.28723308506012}
Epoch: 1000, avg_loss: 1.0441334484815596
Epoch: 2000, avg_loss: 1.0296621508598327
Epoch: 3000, avg_loss: 1.0237164240280787
Epoch: 4000, avg_loss: 1.0195346966534853




Average training loss in episode 0: 50588.34421953125
Average training loss in episode 1: 10549.6311484375
Average training loss in episode 2: 10186.04148515625
Average training loss in episode 3: 10002.2188078125
Average training loss in episode 4: 9819.9819453125
{'MSE': 9690.250556952038, 'MAE': 69.61260110340119}
Epoch: 500, avg_loss: 1.1103951952457427
Epoch: 1000, avg_loss: 1.0852545927762984
Epoch: 1500, avg_loss: 1.0757227824529012
Epoch: 2000, avg_loss: 1.070557082772255
Epoch: 2500, avg_loss: 1.0670235564231874
Epoch: 3000, avg_loss: 1.0641985115210215
Epoch: 3500, avg_loss: 1.0618425526959556
Epoch: 4000, avg_loss: 1.0599095394313336
Epoch: 4500, avg_loss: 1.058354919195175




Average training loss in episode 0: 51748.0354640625
Average training loss in episode 1: 10670.295603125
Average training loss in episode 2: 10200.783571875
Average training loss in episode 3: 9998.93963671875
Average training loss in episode 4: 9882.82310546875
{'MSE': 9565.640754853855, 'MAE': 71.05846968574524}
