In [11]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from generator import RoadNetwork
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm

from models import GAEModel, GCNEncoder, GATEncoder

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
network = RoadNetwork()
network.load("../../osm_data/porto")
# df = pd.read_csv("../datasets/trajectories/Porto/road_segment_map_final.csv", sep=";", usecols=["id", "cpath"])
traj_features = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
traj_features.set_index(["u", "v", "key"], inplace=True)
traj_features["util"] = (traj_features["util"] - traj_features["util"].min()) / (traj_features["util"].max() - traj_features["util"].min())  # min max normalization
traj_features["avg_speed"] = (traj_features["avg_speed"] - traj_features["avg_speed"].min()) / (traj_features["avg_speed"].max() - traj_features["avg_speed"].min())  # min max normalization
traj_features.fillna(0, inplace=True)

In [4]:
torch.cuda.set_device(2)
torch.cuda.is_available()

print(traj_features.info())

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 11331 entries, (25503936, 4722746638, 0) to (9709007543, 415754684, 0)
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   id         11331 non-null  int64  
 1   util       11331 non-null  float64
 2   avg_speed  11331 non-null  float64
dtypes: float64(2), int64(1)
memory usage: 662.5 KB
None


In [15]:
data = network.generate_road_segment_pyg_dataset(traj_data=None, include_coords=True, drop_labels=["highway_enc"]) # traj_features

In [18]:
data.x.shape

torch.Size([11331, 21])

In [None]:
# for training without features
# data.x = None

In [19]:
from torch_geometric.data import Data
import torch_geometric.transforms as T

# create pyg dataset
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    # T.OneHotDegree(128), # training without features
    T.ToDevice(device),
])
data = transform(data)
print(data.x.shape)
model = GAEModel(data, device=device, encoder=GATEncoder, emb_dim=128)
model.train(epochs=20000)
# model.save_model(path="../model_states/gaegcn/")
# model.save_emb(path="../model_states/gaegcn/")


torch.Size([11331, 21])
Epoch: 500, avg_loss: 0.979432095170021
Epoch: 1000, avg_loss: 0.9389632423520088
Epoch: 1500, avg_loss: 0.9185743187268575
Epoch: 2000, avg_loss: 0.9061920402050019
Epoch: 2500, avg_loss: 0.8975091202259063
Epoch: 3000, avg_loss: 0.8909486925403277
Epoch: 3500, avg_loss: 0.8855186921698707
Epoch: 4000, avg_loss: 0.8809734211564064
Epoch: 4500, avg_loss: 0.8770406175454457
Epoch: 5000, avg_loss: 0.8735740212321281
Epoch: 5500, avg_loss: 0.8704851040189916
Epoch: 6000, avg_loss: 0.8676321873366832
Epoch: 6500, avg_loss: 0.865113131413093
Epoch: 7000, avg_loss: 0.8627527216076851
Epoch: 7500, avg_loss: 0.8605937406539917
Epoch: 8000, avg_loss: 0.8586001276895404
Epoch: 8500, avg_loss: 0.8567462715751984
Epoch: 9000, avg_loss: 0.8550302124089665
Epoch: 9500, avg_loss: 0.8534289087559047
Epoch: 10000, avg_loss: 0.8519359570860863
Epoch: 10500, avg_loss: 0.850536069205829
Epoch: 11000, avg_loss: 0.8492084951129827
Epoch: 11500, avg_loss: 0.8479676373523215
Epoch: 120

In [None]:
z = model.model.encode(data.x, data.edge_index)
z.shape

In [20]:
model.save_model(path="../model_states/gaegat/")

In [None]:
z = model.load_emb("../../model_states/gaegcn/embedding.out")

In [None]:
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

X = z.detach().cpu().numpy() # embedding for each node
# train simple classifier on 80% of data with cross validation
y = np.array([network.gdf_edges.loc[n]["highway_enc"] for n in network.line_graph.nodes])

# mask = ((y==11) | (y==10) | (y==9) | (y==4) | (y==1) | (y==2) | (y==12) | (y==7)) # remove uncommon tags
# X = X[~mask, :]
# y = y[~mask]
# print(np.unique(y, return_counts=True))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

In [None]:
lm = linear_model.LogisticRegression(multi_class="multinomial", max_iter=1000)
lm.fit(X_train, y_train)
print(metrics.classification_report(y_test, lm.predict(X_test)))

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

tf = pd.read_csv("../../datasets/trajectories/Porto/speed_features_unnormalized.csv")
tf.set_index(["u", "v", "key"], inplace=True)
map_id = {j: i for i, j in enumerate(network.line_graph.nodes)}
tf["idx"] = tf.index.map(map_id)
tf.sort_values(by="idx", axis=0, inplace=True)
decoder = linear_model.LinearRegression(fit_intercept=True)
X = z.detach().cpu().numpy()
y = tf["avg_speed"]
y.fillna(0, inplace=True)
y = y.round(2)

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.2, random_state = 1)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

decoder.fit(X_train, y_train)
scorer = make_scorer(metrics.mean_absolute_error)
print(np.mean(cross_val_score(estimator=decoder, X=X_test, y=y_test, scoring=scorer, cv=5)))

In [None]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False),
])
dataset = Planetoid(".", "Cora", transform=transform)
t,v, te = dataset[0]
t

In [None]:
from torch_geometric.utils import train_test_split_edges
device = torch.device('cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False)
])
print(data)
transform(data)
test = train_test_split_edges(data)

print(test)