In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm

from models import GAEModel, GCNEncoder, GATEncoder

In [6]:
network = RoadNetwork()
network.load("../../osm_data/porto")
# df = pd.read_csv("../datasets/trajectories/Porto/road_segment_map_final.csv", sep=";", usecols=["id", "cpath"])
traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features["util"] = (traj_features["util"] - traj_features["util"].min()) / (traj_features["util"].max() - traj_features["util"].min())  # min max normalization
traj_features["avg_speed"] = (traj_features["avg_speed"] - traj_features["avg_speed"].min()) / (traj_features["avg_speed"].max() - traj_features["avg_speed"].min())  # min max normalization
traj_features.fillna(0, inplace=True)

In [4]:
torch.cuda.set_device(1)
torch.cuda.is_available()

print(traj_features.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11331 entries, 0 to 11330
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   u          11331 non-null  int64  
 1   v          11331 non-null  int64  
 2   key        11331 non-null  int64  
 3   id         11331 non-null  int64  
 4   util       11331 non-null  float64
 5   avg_speed  11331 non-null  float64
dtypes: float64(2), int64(4)
memory usage: 531.3 KB
None


In [7]:
data = network.generate_road_segment_pyg_dataset(traj_data=traj_features)

In [11]:
data.x

In [13]:
# for training without features
data.x = None

TypeError: empty() received an invalid combination of arguments - got (), but expected one of:
 * (tuple of ints size, *, tuple of names names, torch.memory_format memory_format, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, torch.memory_format memory_format, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


In [12]:
from torch_geometric.data import Data
import torch_geometric.transforms as T

# create pyg dataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    T.OneHotDegree(30),
    T.ToDevice(device),
])
data = transform(data)
print(data.x)
#model = GAEModel(data, device=device, encoder=GCNEncoder, emb_dim=128)
#model.train(epochs=10000)
# model.save_model(path="../../model_states/gaegcn/")
# model.save_emb(path="../../model_states/gaegcn/")


AttributeError: 'NoneType' object has no attribute 'shape'

In [84]:
z = model.model.encode(data.x, data.edge_index)
z.shape

torch.Size([11331, 128])

In [6]:
z = model.load_emb("../../model_states/gaegcn/embedding.out")

In [9]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

X = z#detach().cpu().numpy() # embedding for each node
# train simple classifier on 80% of data with cross validation
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

# mask = ((y==11) | (y==10) | (y==9) | (y==4) | (y==1) | (y==2) | (y==12) | (y==7)) # remove uncommon tags
# X = X[~mask, :]
# y = y[~mask]
# print(np.unique(y, return_counts=True))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

X_train dimension=  (9064, 128)
X_test dimension=  (2267, 128)
y_train dimension=  (9064,)
y_test dimension=  (2267,)


In [10]:
lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
lm.fit(X_train, y_train)
print(metrics.classification_report(y_test, lm.predict(X_test)))

              precision    recall  f1-score   support

           0       0.83      0.80      0.81       309
           1       0.58      0.39      0.47        18
           2       0.58      0.68      0.63        31
           3       0.78      0.75      0.77       133
           4       0.62      0.62      0.62        13
           5       0.87      0.88      0.87      1175
           6       0.74      0.72      0.73       301
           7       0.55      0.60      0.57        20
           8       0.70      0.76      0.73       221
           9       1.00      0.60      0.75         5
          10       0.75      0.75      0.75         4
          11       0.00      0.00      0.00         3
          12       0.88      0.68      0.77        34

    accuracy                           0.81      2267
   macro avg       0.68      0.63      0.65      2267
weighted avg       0.81      0.81      0.81      2267



In [7]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False),
])
dataset = Planetoid(".", "Cora", transform=transform)
t,v, te = dataset[0]
t

Data(x=[2708, 1433], edge_index=[2, 8976], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], pos_edge_label=[4488], pos_edge_label_index=[2, 4488])

In [21]:
from torch_geometric.utils import train_test_split_edges
device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False)
])
print(data)
transform(data)
test = train_test_split_edges(data)

print(test)

Data(x=[11331, 8], edge_index=[2, 26617])




Data(x=[11331, 8], val_pos_edge_index=[2, 699], test_pos_edge_index=[2, 1399], train_pos_edge_index=[2, 23788], train_neg_adj_mask=[11331, 11331], val_neg_edge_index=[2, 699], test_neg_edge_index=[2, 1399])
