In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork, Trajectory
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch_geometric.transforms as T

from models import GTNModel, GTCModel, GAEModel, Node2VecModel, GCNEncoder, Traj2VecModel
from evaluation.tasks import TravelTimeEstimation, NextLocationPrediciton

In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
trajectory = Trajectory("../../datasets/trajectories/Porto/road_segment_map_final.csv", nrows=1000).generate_TTE_datatset()

traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features["util"] = (traj_features["util"] - traj_features["util"].min()) / (traj_features["util"].max() - traj_features["util"].min())  # min max normalization
traj_features["avg_speed"] = (traj_features["avg_speed"] - traj_features["avg_speed"].min()) / (traj_features["avg_speed"].max() - traj_features["avg_speed"].min())  # min max normalization
traj_features.fillna(0, inplace=True)

# data = network.generate_road_segment_pyg_dataset(drop_labels=["highway_enc"])
data_roadclf = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["highway_enc"], traj_data=None)
data_meanspeed = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["avg_speed"], traj_data=traj_features.copy())
data_rest = network.generate_road_segment_pyg_dataset(include_coords=True, traj_data=traj_features.copy())

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

In [None]:
m = Traj2Vec.map_traj_to_node_ids(trajectory["seg_seq"].values, network, 10)

In [None]:
print(m[-1])

In [4]:
adj = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1.gz") # for traj2vec 'traj_adj_k_1_False_no_selfloops_smoothed'

In [None]:
print(adj[108, 130:140])

In [None]:
walks = Traj2Vec.traj_walk(adj, 5, 10000*[0], 10)
print(walks)

In [None]:
from _walker import random_walks as _random_walks
from scipy import sparse

A = sparse.csr_matrix(adj)
indptr = A.indptr.astype(np.uint32)
indices = A.indices.astype(np.uint32)
weights = A.data.astype(np.float32)

_random_walks(indptr, indices, weights, [100,100,100], 5, 6)

In [None]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2VecModel(
            data,
            network,
            adj,
            device=device,
            emb_dim=128,
            walk_length=30,
            context_size=5,
            walks_per_node=25,
            num_neg=10,
        )
traj2vec.train(epochs=200)

In [None]:
torch.save(traj2vec.state_dict(), "modelt.pt")

In [None]:
data.x = None
data = T.OneHotDegree(128)(data)

In [3]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# precalc adj matrices
GTCModel(data_rest, device, network, trajectory, k=6, bidirectional=False, add_self_loops=True)

100%|██████████| 1544234/1544234 [14:12<00:00, 1810.84it/s]


<models.gtn.GTCModel at 0x7f8ac013bd30>

In [10]:
models = []
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model = GTCModel(data_roadclf, device, network, trajectory, adj=adj)
# model2 = GTNModel(data2, device, network, trajectory, load_traj_adj_path="./gtn_precalc_adj/traj_adj_k_1.gz", norm=True)
# model3 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
# model4 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
model5 = Node2VecModel(data_roadclf, device=device, q=4, p=1)
model6 = Traj2VecModel(data_roadclf, network, adj, device=device, emb_dim=128, walk_length=30, context_size=5, walks_per_node=25, num_neg=10)

models.extend([(model, 5000)]) # (model3, 5000), (model4, 5000)

In [9]:
model.train_data.x.shape

torch.Size([11331, 21])

In [6]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# for k in [1]:
#     model = GTNModel(data, device, network, trajectory, load_traj_adj_path="./traj_adj_k_{}.gz".format(k))
#     model.train(epochs=1000)
#     models.append(model)

model.train(epochs=20000)

Epoch: 1000, avg_loss: 1.062049215912819
Epoch: 2000, avg_loss: 1.0509561000466348
Epoch: 3000, avg_loss: 1.0451859891017279
Epoch: 4000, avg_loss: 1.0410418401658534
Epoch: 5000, avg_loss: 1.0383177030563355
Epoch: 6000, avg_loss: 1.0364838276108106
Epoch: 7000, avg_loss: 1.0352009556804385
Epoch: 8000, avg_loss: 1.0342311831712723
Epoch: 9000, avg_loss: 1.0334484394656287
Epoch: 10000, avg_loss: 1.032840489625931
Epoch: 11000, avg_loss: 1.0323191492991013
Epoch: 12000, avg_loss: 1.0318746157089869
Epoch: 13000, avg_loss: 1.0314973780742058
Epoch: 14000, avg_loss: 1.031178311262812
Epoch: 15000, avg_loss: 1.0309027838945388
Epoch: 16000, avg_loss: 1.0306575378105045
Epoch: 17000, avg_loss: 1.0304378623191048
Epoch: 18000, avg_loss: 1.0302513183818922
Epoch: 19000, avg_loss: 1.030081497418253


In [None]:
z = model.model(model.train_data.x, model.train_data.edge_traj_index, model.train_data.edge_weight)
z.shape

In [None]:
print(models)

In [7]:
model.save_model(path="../model_states/gtc/")

[autoreload of models.gtn failed: Traceback (most recent call last):
  File "/home/pheinemeyer/miniconda3/envs/road/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 257, in check
    superreload(m, reload, self.old_objects)
  File "/home/pheinemeyer/miniconda3/envs/road/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 455, in superreload
    module = reload(module)
  File "/home/pheinemeyer/miniconda3/envs/road/lib/python3.9/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 613, in _exec
  File "<frozen importlib._bootstrap_external>", line 850, in exec_module
  File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
  File "/dstore/home/pheinemeyer/Road-Network-Embedding-Generator/models/gtn.py", line 69, in <module>
    class PositionalEncoding(nn.Module):
  File "/dstore/home/pheinemeyer/Road-Network-Embedding-Generator/models/gtn.py", line 83, in Posi

In [11]:
from torch_geometric.nn.norm import LayerNorm
# load node2vec emb
model5.load_model("../model_states/node2vec/model.pt")
z2 = model5.load_emb()
model6.load_model("../model_states/traj2vec/model.pt")
z3 = model6.load_emb()

norm = LayerNorm(z3.shape[1], affine=False)
z4 = norm(torch.Tensor(z2)).detach().cpu().numpy()
z5 = norm(torch.Tensor(z3)).detach().cpu().numpy()

FileNotFoundError: [Errno 2] No such file or directory: '../model_states/node2vec/model.pt'

In [None]:
print(z3.shape)

In [12]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

n2v = models[-1]
idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

for m, e in models:
    m.train(epochs=e)
    zn = m.load_emb()
    zcn = np.concatenate((zn, z2), axis=1)
    zct = np.concatenate((zn, z3), axis=1)
    # zcnn = np.concatenate((zn, z4), axis=1)
    # zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
    eva = [zn, zcn, zct]
    for X in eva:
        X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

        lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
        lm.fit(X_train, y_train)
        scorer = make_scorer(metrics.f1_score, average="macro")
        print(np.mean(cross_val_score(estimator=lm, X=X_test, y=y_test, scoring=scorer, cv=5)))
    #print(metrics.classification_report(y_test, lm.predict(X_test)))

Epoch: 1000, avg_loss: 1.061252117872238
Epoch: 2000, avg_loss: 1.0504281715750694
Epoch: 3000, avg_loss: 1.0447946486473083
Epoch: 4000, avg_loss: 1.0407389248609542


NameError: name 'z2' is not defined

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)

idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)

y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)
y = y.values

for m, e in models:
    m.train(epochs=e)
    
    zn = m.load_emb()
    zcn = np.concatenate((zn, z2), axis=1)
    zct = np.concatenate((zn, z3), axis=1)
    zcnn = np.concatenate((zn, z4), axis=1)
    zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
    eva = [zn, zcn, zct, zcnn, zctn]
    for X in eva:
        decoder = linear_model.LinearRegression(fit_intercept=True)

        X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

        decoder.fit(X_train, y_train)
        scorer = make_scorer(metrics.mean_absolute_error)
        print(np.mean(cross_val_score(estimator=decoder, X=X_test, y=y_test, scoring=scorer, cv=5)))

In [None]:
travel_time_est = TravelTimeEstimation(
    traj_dataset=trajectory,
    network=network,
    device=device,
    batch_size=128,
    epochs=5,
    seed=88,
)
travel_time_est.register_metric(
    name="MSE", metric_func=metrics.mean_squared_error, args={}
)
travel_time_est.register_metric(
    name="MAE", metric_func=metrics.mean_absolute_error, args={}
)

for i, (m, e) in enumerate(models):
    m.train(epochs=e)
    zn = m.load_emb()
    zcn = np.concatenate((zn, z2), axis=1)
    zct = np.concatenate((zn, z3), axis=1)
    # X = z # embedding for each node
    eva = [zn, zcn, zct]
    for X in eva:
        print(travel_time_est.evaluate(X))

In [None]:
nextlocation_pred = NextLocationPrediciton(
    traj_dataset=trajectory,
    network=network,
    device=device,
    batch_size=256,
    epochs=5,
    seed=88,
)

nextlocation_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

for i, (m, e) in enumerate(models):
    m.train(epochs=e)
    zn = m.load_emb()
    zcn = np.concatenate((zn, z2), axis=1)
    zct = np.concatenate((zn, z3), axis=1)
    zcnn = np.concatenate((zn, z4), axis=1)
    zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
    eva = [zn, zcn, zct, zcnn, zctn]
    for X in eva:
        print(nextlocation_pred.evaluate(X))