In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm

from models import GAEModel, GCNEncoder, GATEncoder

In [2]:
city = "hanover"
city_traj = "hanover"

In [3]:
network = RoadNetwork()
network.load(f"../../osm_data/{city}")
# df = pd.read_csv("../datasets/trajectories/Porto/road_segment_map_final.csv", sep=";", usecols=["id", "cpath"])
# traj_features = pd.read_csv(f"../../datasets/trajectories/{city_traj}/speed_features_unnormalized.csv")
# traj_features.set_index(["u", "v", "key"], inplace=True)
# traj_features["util"] = (traj_features["util"] - traj_features["util"].min()) / (traj_features["util"].max() - traj_features["util"].min())  # min max normalization
# traj_features["avg_speed"] = (traj_features["avg_speed"] - traj_features["avg_speed"].min()) / (traj_features["avg_speed"].max() - traj_features["avg_speed"].min())  # min max normalization
# traj_features.fillna(0, inplace=True)

In [4]:
torch.cuda.set_device(3)
torch.cuda.is_available()

True

In [14]:
data = network.generate_road_segment_pyg_dataset(traj_data=None, include_coords=True, dataset=city) # traj_features

In [15]:
data.x.shape

torch.Size([18576, 36])

In [None]:
# for training without features
# data.x = None

In [18]:
from torch_geometric.data import Data
import torch_geometric.transforms as T

# create pyg dataset
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    # T.OneHotDegree(128), # training without features
    T.ToDevice(device),
])
data = transform(data)
model = GAEModel(data, device=device, encoder=GCNEncoder, emb_dim=128)
model.train(epochs=5000)
# model.save_model(path="../model_states/gaegcn/")
# model.save_emb(path="../model_states/gaegcn/")


Epoch: 500, avg_loss: 1.063704967021942
Epoch: 1000, avg_loss: 1.0332980356812478
Epoch: 1500, avg_loss: 1.0161029305855434
Epoch: 2000, avg_loss: 1.005911487519741
Epoch: 2500, avg_loss: 0.9972751693964005
Epoch: 3000, avg_loss: 0.9958636118570964
Epoch: 3500, avg_loss: 0.9986427420037134
Epoch: 4000, avg_loss: 0.9949082068800926
Epoch: 4500, avg_loss: 0.9901180153422885


In [20]:
z = model.model.encode(data.x, data.edge_index)
z.shape

torch.Size([18576, 128])

In [19]:
model.save_model(path="../model_states/gaegcn/")

In [21]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

X = z.detach().cpu().numpy() # embedding for each node
# train simple classifier on 80% of data with cross validation
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

# mask = ((y==11) | (y==10) | (y==9) | (y==4) | (y==1) | (y==2) | (y==12) | (y==7)) # remove uncommon tags
# X = X[~mask, :]
# y = y[~mask]
# print(np.unique(y, return_counts=True))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

X_train dimension=  (14860, 128)
X_test dimension=  (3716, 128)
y_train dimension=  (14860,)
y_test dimension=  (3716,)


In [22]:
lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
lm.fit(X_train, y_train)
print(metrics.classification_report(y_test, lm.predict(X_test)))

              precision    recall  f1-score   support

           0       0.88      0.84      0.86       170
           1       0.50      0.25      0.33         4
           2       0.93      0.86      0.89        29
           3       0.63      0.61      0.62        31
           4       0.00      0.00      0.00         1
           5       0.95      0.95      0.95      2720
           6       1.00      1.00      1.00         2
           7       0.71      0.76      0.73       224
           8       0.80      0.73      0.76        11
           9       0.82      0.82      0.82       425
          10       0.00      0.00      0.00         1
          11       0.65      0.75      0.70        20
          12       0.71      0.67      0.69        45
          13       0.72      0.64      0.68        33

    accuracy                           0.91      3716
   macro avg       0.67      0.63      0.65      3716
weighted avg       0.91      0.91      0.91      3716



In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)
decoder = linear_model.LinearRegression(fit_intercept=True)
X = z.detach().cpu().numpy()
y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

decoder.fit(X_train, y_train)
scorer = make_scorer(metrics.mean_absolute_error)
print(np.mean(cross_val_score(estimator=decoder, X=X_test, y=y_test, scoring=scorer, cv=5)))

In [None]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False),
])
dataset = Planetoid(".", "Cora", transform=transform)
t,v, te = dataset[0]
t

In [None]:
from torch_geometric.utils import train_test_split_edges
device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False)
])
print(data)
transform(data)
test = train_test_split_edges(data)

print(test)