In [8]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm

from models import GAEModel

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
network = RoadNetwork()
network.load("../osm_data/porto")
df = pd.read_csv("../datasets/trajectories/Porto/road_segment_map_final.csv", sep=";", usecols=["id", "cpath"])

In [10]:
LG = network.line_graph
# create edge_index
map_id = {j:i for i,j in enumerate(LG.nodes)}
edge_list = nx.to_pandas_edgelist(LG)
edge_list["sidx"] = edge_list["source"].map(map_id)
edge_list["tidx"] = edge_list["target"].map(map_id)

edge_index = np.array(edge_list[["sidx", "tidx"]].values).T
edge_index = torch.tensor(edge_index, dtype=torch.long).contiguous()

print(edge_index.shape, len(list(LG.edges)))

torch.Size([2, 26617]) 26617


In [11]:
from sklearn.impute import KNNImputer
# create feature matrix
df = network.gdf_edges.copy()
df["idx"] = df.index.map(map_id)
df.sort_values(by="idx", axis=0, inplace=True)
df.drop(["osmid", "fid", "geometry", "highway", "idx", "name", "ref", "access", "area", "width"], axis=1, inplace=True)
df.reset_index(drop=True, inplace=True)
df["bridge"] = df["bridge"].fillna(0).replace(["yes", "viaduct", "['yes', 'viaduct']", "cantilever"], 1)
df["tunnel"] = df["tunnel"].fillna(0).replace(["yes", "building_passage", "culvert"], 1)
df["junction"] = df["junction"].fillna(0).replace(["roundabout", "circular"], 1)
df["lanes"] = df["lanes"].str.extract(r"(\w+)")
df["maxspeed"] = df["maxspeed"].str.extract(r"(\w+)")

imputer = KNNImputer(n_neighbors=2)
imputed = imputer.fit_transform(df)
df["lanes"] = imputed[:, 2].astype(int)
df["maxspeed"] = imputed[:, 3].astype(int)

features = torch.DoubleTensor(np.array(df.values, dtype=np.double))


In [12]:
torch.cuda.is_available()

True

In [18]:
from torch_geometric.data import Data
import torch_geometric.transforms as T

# create pyg dataset
data = Data(x=features, edge_index=edge_index)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
])
data = transform(data)
model = GAEModel(data.x.shape[1], 128, device=device)
model.train(train_data=data, epochs=3000)
model.save_model(save_name="gae_3000e.pt", path="../model_states/gae/")
model.save_emb(path="../model_states/gae/")


Epoch: 100, avg_loss: 1.2743199396133422
Epoch: 200, avg_loss: 1.2199022823572159
Epoch: 300, avg_loss: 1.1905180243651072
Epoch: 400, avg_loss: 1.16825299680233
Epoch: 500, avg_loss: 1.1495556945800782
Epoch: 600, avg_loss: 1.1358440444866815
Epoch: 700, avg_loss: 1.1249172048909324
Epoch: 800, avg_loss: 1.115951492935419
Epoch: 900, avg_loss: 1.1079810954464806
Epoch: 1000, avg_loss: 1.1009827965497971
Epoch: 1100, avg_loss: 1.0950275325775147
Epoch: 1200, avg_loss: 1.089638578792413
Epoch: 1300, avg_loss: 1.084645246909215
Epoch: 1400, avg_loss: 1.0801280229432242
Epoch: 1500, avg_loss: 1.075972119172414
Epoch: 1600, avg_loss: 1.0720443864166738
Epoch: 1700, avg_loss: 1.0683366833714878
Epoch: 1800, avg_loss: 1.0649564906292492
Epoch: 1900, avg_loss: 1.0616580911372837
Epoch: 2000, avg_loss: 1.0585656130313874
Epoch: 2100, avg_loss: 1.0557288302694048
Epoch: 2200, avg_loss: 1.052943977686492
Epoch: 2300, avg_loss: 1.0503115304915802
Epoch: 2400, avg_loss: 1.0478244746476413
Epoch: 2

In [12]:
z = model.model.encode(data.x, data.edge_index)
z.shape

torch.Size([11331, 128])

In [21]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

X = z.detach().cpu().numpy() # embedding for each node
# train simple classifier on 80% of data with cross validation
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

mask = ((y==11) | (y==10) | (y==9) | (y==4) | (y==1) | (y==2) | (y==12) | (y==7)) # remove uncommon tags
X = X[~mask, :]
y = y[~mask]
print(np.unique(y, return_counts=True))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

(array([0, 3, 5, 6, 8]), array([1680,  577, 5832, 1451, 1213]))
X_train dimension=  (8602, 128)
X_test dimension=  (2151, 128)
y_train dimension=  (8602,)
y_test dimension=  (2151,)


In [22]:
lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
lm.fit(X_train, y_train)
print(metrics.classification_report(y_test, lm.predict(X_test)))

              precision    recall  f1-score   support

           0       0.83      0.81      0.82       343
           3       0.70      0.60      0.64       121
           5       0.76      0.88      0.81      1135
           6       0.68      0.36      0.47       305
           8       0.75      0.71      0.73       247

    accuracy                           0.76      2151
   macro avg       0.74      0.67      0.70      2151
weighted avg       0.76      0.76      0.75      2151



In [7]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False),
])
dataset = Planetoid(".", "Cora", transform=transform)
t,v, te = dataset[0]
t

Data(x=[2708, 1433], edge_index=[2, 8976], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], pos_edge_label=[4488], pos_edge_label_index=[2, 4488])

In [21]:
from torch_geometric.utils import train_test_split_edges
device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False)
])
print(data)
transform(data)
test = train_test_split_edges(data)

print(test)

Data(x=[11331, 8], edge_index=[2, 26617])




Data(x=[11331, 8], val_pos_edge_index=[2, 699], test_pos_edge_index=[2, 1399], train_pos_edge_index=[2, 23788], train_neg_adj_mask=[11331, 11331], val_neg_edge_index=[2, 699], test_neg_edge_index=[2, 1399])
