In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork, Trajectory
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch_geometric.transforms as T

from models import GTNModel, GTCModel, GAEModel, Node2VecModel, GCNEncoder, Traj2VecModel
from evaluation.tasks import TravelTimeEstimation, NextLocationPrediciton

In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
trajectory = Trajectory("../../datasets/trajectories/Porto/road_segment_map_final.csv", nrows=10000000).generate_TTE_datatset()

traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features["util"] = (traj_features["util"] - traj_features["util"].min()) / (traj_features["util"].max() - traj_features["util"].min())  # min max normalization
traj_features["avg_speed"] = (traj_features["avg_speed"] - traj_features["avg_speed"].min()) / (traj_features["avg_speed"].max() - traj_features["avg_speed"].min())  # min max normalization
traj_features.fillna(0, inplace=True)

# data = network.generate_road_segment_pyg_dataset(drop_labels=["highway_enc"])
data_roadclf = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["highway_enc"], traj_data=None)
data_meanspeed = network.generate_road_segment_pyg_dataset(include_coords=True, drop_labels=["avg_speed"], traj_data=traj_features.copy())
data_rest = network.generate_road_segment_pyg_dataset(include_coords=True, traj_data=None)

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1544234 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
adj_bi = np.loadtxt("./gtn_precalc_adj/traj_adj_k_3.gz") # for traj2vec 'traj_adj_k_1_False_no_selfloops_smoothed'
# adj_for = np.loadtxt("./gtn_precalc_adj/traj_adj_k_1_False.gz")

In [None]:
print(adj[108, 130:140])

In [None]:
walks = Traj2Vec.traj_walk(adj, 5, 10000*[0], 10)
print(walks)

In [None]:
from _walker import random_walks as _random_walks
from scipy import sparse

A = sparse.csr_matrix(adj)
indptr = A.indptr.astype(np.uint32)
indices = A.indices.astype(np.uint32)
weights = A.data.astype(np.float32)

_random_walks(indptr, indices, weights, [100,100,100], 5, 6)

In [None]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
traj2vec = Traj2VecModel(
            data,
            network,
            adj,
            device=device,
            emb_dim=128,
            walk_length=30,
            context_size=5,
            walks_per_node=25,
            num_neg=10,
        )
traj2vec.train(epochs=200)

In [None]:
torch.save(traj2vec.state_dict(), "modelt.pt")

In [None]:
data.x = None
data = T.OneHotDegree(128)(data)

In [None]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# precalc adj matrices
GTCModel(data_rest, device, network, trajectory, k=6, bidirectional=False, add_self_loops=True)

In [54]:
models = []
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model = GTCModel(data_rest, device, network, trajectory, adj=adj_bi)
# model2 = GTCModel(data_roadclf, device, network, trajectory, adj=adj_for)
# model2 = GTNModel(data2, device, network, trajectory, load_traj_adj_path="./gtn_precalc_adj/traj_adj_k_1.gz", norm=True)
# model3 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
# model4 = GAEModel(data2, device=device, encoder=GCNEncoder, emb_dim=128, layers=1)
# model5 = Node2VecModel(data_roadclf, device=device, q=4, p=1)
#model6 = Traj2VecModel(data_roadclf, network, adj, device=device, emb_dim=128, walk_length=30, context_size=5, walks_per_node=25, num_neg=10)

# models.extend([(model, 5000), (model2, 5000)]) # (model3, 5000), (model4, 5000)

In [20]:
model.train_data.x.shape

torch.Size([11331, 33])

In [55]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# for k in [1]:
#     model = GTNModel(data, device, network, trajectory, load_traj_adj_path="./traj_adj_k_{}.gz".format(k))
#     model.train(epochs=1000)
#     models.append(model)

model.train(epochs=5000)

Epoch: 1000, avg_loss: 1.067290608048439
Epoch: 2000, avg_loss: 1.0501294767856597
Epoch: 3000, avg_loss: 1.0425132358074187
Epoch: 4000, avg_loss: 1.0379318472743035


In [56]:
z = model.load_emb()
z.shape

(11331, 128)

In [None]:
print(models)

In [57]:
model.save_model(path="../model_states/gtc/")

In [None]:
from torch_geometric.nn.norm import LayerNorm
# load node2vec emb
model5.load_model("../model_states/node2vec/model_base.pt")
z2 = model5.load_emb()
model6.load_model("../model_states/traj2vec/model_base.pt")
z3 = model6.load_emb()

norm = LayerNorm(z3.shape[1], affine=False)
z4 = norm(torch.Tensor(z2)).detach().cpu().numpy()
z5 = norm(torch.Tensor(z3)).detach().cpu().numpy()

In [82]:
gae = GAEModel(data_rest, device=device, encoder=GCNEncoder, emb_dim=128, layers=2)
gae.load_model("../model_states/gaegcn/model_base.pt")
gae_emb = gae.load_emb()
gtc = GTCModel(data_rest, device, network, trajectory, adj=adj_bi)
gtc.load_model("../model_states/gtc/model_k3.pt")
t2v = Traj2VecModel(data_roadclf, network, adj_bi, device=device)
t2v.load_model("../model_states/traj2vec/model_base.pt")
z = gtc.load_emb() + t2v.load_emb()
rand_emb = np.random.randn(*z.shape)

In [83]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

# n2v = models[-1]
idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

# for m, e in models:
    # m.train(epochs=e)
    # zn = m.load_emb()
    # zcn = np.concatenate((zn, z2), axis=1)
    # zct = np.concatenate((zn, z3), axis=1)
    # zcnn = np.concatenate((zn, z4), axis=1)
    # zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
eva = [z, gae_emb, rand_emb]
for X in eva:
    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
    # lm.fit(X_train, y_train)
    scorer = make_scorer(metrics.f1_score, average="macro")
    print(np.mean(cross_val_score(estimator=lm, X=X, y=y, scoring=scorer, cv=5)))
    #print(metrics.classification_report(y_test, lm.predict(X_test)))

0.7891448585793378
0.6101262337702813
0.05217859890304014


In [84]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)

idxs = np.arange(len(network.line_graph.nodes))
train_idx, test_idx = model_selection.train_test_split(idxs, test_size=0.2, random_state=69)

y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)
y = y.values

# for m, e in models:
#     m.train(epochs=e)
    
    # zn = m.load_emb()
    # zcn = np.concatenate((zn, z2), axis=1)
    # zct = np.concatenate((zn, z3), axis=1)
    # zcnn = np.concatenate((zn, z4), axis=1)
    # zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
eva = [z, gae_emb, rand_emb]
for X in eva:
    decoder = linear_model.LinearRegression(fit_intercept=True)

    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

    # decoder.fit(X_train, y_train)
    scorer = make_scorer(metrics.mean_absolute_error)
    print(np.mean(cross_val_score(estimator=decoder, X=X, y=y, scoring=scorer, cv=5)))

14.800262631844811
13.979628785756239
15.661938068991324


In [80]:
travel_time_est = TravelTimeEstimation(
    traj_dataset=trajectory,
    network=network,
    device=device,
    batch_size=128,
    epochs=3,
    seed=88,
)
travel_time_est.register_metric(
    name="MSE", metric_func=metrics.mean_squared_error, args={}
)
travel_time_est.register_metric(
    name="MAE", metric_func=metrics.mean_absolute_error, args={}
)

# for i, (m, e) in enumerate(models):
# m.train(epochs=e)
# zn = m.load_emb()
# zcn = np.concatenate((zn, z2), axis=1)
# zct = np.concatenate((zn, z3), axis=1)
# X = z # embedding for each node
eva = [z, gae_emb, rand_emb]
for X in eva:
    print(travel_time_est.evaluate(X))

{'MSE': 9949.570608700878, 'MAE': 71.89226948661805}
{'MSE': 11646.49069475799, 'MAE': 81.10363915457725}
{'MSE': 10092.045623252046, 'MAE': 72.39778632850647}


In [81]:
nextlocation_pred = NextLocationPrediciton(
    traj_dataset=trajectory,
    network=network,
    device=device,
    batch_size=256,
    epochs=3,
    seed=88,
)

nextlocation_pred.register_metric(
    name="accuracy",
    metric_func=metrics.accuracy_score,
    args={"normalize": True},
)

# for i, (m, e) in enumerate(models):
#     m.train(epochs=e)
#     zn = m.load_emb()
#     zcn = np.concatenate((zn, z2), axis=1)
#     zct = np.concatenate((zn, z3), axis=1)
#     zcnn = np.concatenate((zn, z4), axis=1)
#     zctn = np.concatenate((zn, z5), axis=1)
    # X = z # embedding for each node
eva = [z, gae_emb, rand_emb]
for X in eva:
    print(nextlocation_pred.evaluate(X))

Pandas Apply:   0%|          | 0/80000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/80000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/20000 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/20000 [00:00<?, ?it/s]

Average training loss in episode 0: 104.00620584213695
Average training loss in episode 1: 90.98275513085314
Average training loss in episode 2: 73.96600678172736
{'accuracy': 0.2464}
Average training loss in episode 0: 106.01926556181984
Average training loss in episode 1: 91.70118247815215
Average training loss in episode 2: 76.58374203812961
{'accuracy': 0.2223}
Average training loss in episode 0: 96.02736768631128
Average training loss in episode 1: 57.71753060094084
Average training loss in episode 2: 39.92700451250655
{'accuracy': 0.49315}
