In [1]:
import sys
import os

# Set the main path in the root folder of the project.
sys.path.append(os.path.join('..'))

In [2]:
# Settings for autoreloading.
%load_ext autoreload
%autoreload 2

In [3]:
from src.utils.seed import set_random_seed

# Set the random seed for deterministic operations.
SEED = 42
set_random_seed(SEED)

In [4]:
import torch

# Set the device for training and querying the model.
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'The selected device is: "{DEVICE}"')

The selected device is: "cuda"


# Loading the Data

In [5]:
import os

BASE_DATA_DIR = os.path.join('..', 'data', 'pems-bay')

In [6]:
import pickle
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [7]:
from src.spatial_temporal_gnn.model import SpatialTemporalGNN
from src.explanation.navigator.model import Navigator
from src.data.data_extraction import get_adjacency_matrix

# Get the adjacency matrix
adj_matrix_structure = get_adjacency_matrix(
    os.path.join(BASE_DATA_DIR, 'raw', 'adj_mx_pems_bay.pkl'))

# Get the header of the adjacency matrix, the node indices and the
# matrix itself.
header, node_ids_dict, adj_matrix = adj_matrix_structure

# Get the STGNN and load the checkpoints.
spatial_temporal_gnn = SpatialTemporalGNN(9, 1, 12, 12, adj_matrix, DEVICE, 64)

stgnn_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                      'st_gnn_pems_bay.pth')

stgnn_checkpoints = torch.load(stgnn_checkpoints_path)
spatial_temporal_gnn.load_state_dict(stgnn_checkpoints['model_state_dict'])

# Set the STGNN in evaluation mode.
spatial_temporal_gnn.eval();

# Get the Navigator and load the checkpoints.
navigator = Navigator(DEVICE)

navigator_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                          'navigator_metr_la.pth')

navigator_checkpoints = torch.load(navigator_checkpoints_path)
navigator.load_state_dict(navigator_checkpoints['model_state_dict'])

# Set the Navigator in evaluation mode.
navigator.eval();



In [9]:
from src.data.data_extraction import get_locations_dataframe

# Get the dataframe containing the latitude and longitude of each sensor.
locations_df = get_locations_dataframe(
    os.path.join(BASE_DATA_DIR, 'raw', 'graph_sensor_locations_pems_bay.csv'),
    has_header=False)

In [10]:
# Get the node positions dictionary.
node_pos_dict = { i: id for id, i in node_ids_dict.items() }

In [11]:
import pickle

# Get the data scaler.
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [12]:
import os
import numpy as np

# Get the explained data.
x_test = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'x_test.npy'))
y_test = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'y_test.npy'))

# Get the time information of the explained data.
x_test_time = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'x_test_time.npy'))
y_test_time = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'y_test_time.npy'))

In [13]:
# Turn the results in kilometers per hour.
from src.utils.config import MPH_TO_KMH_FACTOR


x_test[..., 0] = x_test[..., 0] * MPH_TO_KMH_FACTOR
y_test = y_test * MPH_TO_KMH_FACTOR

In [14]:
import pickle

with open(os.path.join(BASE_DATA_DIR, 'structured', 'node_locations.pkl'), 'rb') as f:
    node_info = pickle.load(f)

In [15]:
i = 5
sample_x, sample_y, sample_x_time, sample_y_time = x_test[i], y_test[i], x_test_time[i], y_test_time[i]

In [16]:
_, n_timesteps, n_nodes, _ = y_test.shape

In [17]:
from src.explanation.clustering.clustering import (
    get_adjacency_distance_matrix)

adj_distance_matrix = get_adjacency_distance_matrix(adj_matrix, n_timesteps)

In [18]:
from src.explanation.clustering.clustering import (
    get_temporal_distance_matrix)

temporal_distance_matrix = get_temporal_distance_matrix(n_nodes, n_timesteps)

In [19]:
# Set the best parameters based on the results of the grid search.

SPEED_DISTANCE_WEIGHT = 3
N_CLUSTERS = 4

In [20]:
from src.explanation.clustering.clustering import get_explanation_clusters

In [21]:
clusters_x = get_explanation_clusters(sample_x[..., :1], adj_distance_matrix, temporal_distance_matrix, SPEED_DISTANCE_WEIGHT, N_CLUSTERS)

In [22]:
#clusters_x

In [23]:
'''from src.explanation.clustering.clustering import get_clusters

clusters_x = get_clusters(
    sample_x[..., :1],
    adj_distance_matrix,
    temporal_distance_matrix,
    eps=EPS,
    min_samples=MIN_SAMPLES,
    remove_zeros=True)'''

'from src.explanation.clustering.clustering import get_clusters\n\nclusters_x = get_clusters(\n    sample_x[..., :1],\n    adj_distance_matrix,\n    temporal_distance_matrix,\n    eps=EPS,\n    min_samples=MIN_SAMPLES,\n    remove_zeros=True)'

In [24]:
sample_x.nonzero()

(array([ 0,  0,  0, ..., 11, 11, 11], dtype=int64),
 array([  0,   0,   0, ..., 323, 324, 324], dtype=int64),
 array([0, 1, 3, ..., 3, 1, 3], dtype=int64))

In [25]:
clusters_x = clusters_x.astype(object)

In [26]:
np.unique(clusters_x)

array([-1, 0, 1, 2, 3], dtype=object)

In [27]:
for c in np.unique(clusters_x):
    print(np.mean(sample_x[...,:1][clusters_x==c]))

0.0
98.59773069473685
103.61876297142858
85.89203040000001
102.998016


In [28]:
for c in np.unique(clusters_x):
    if c == -1:
        clusters_x[clusters_x == c] = ' '
    else:
        clusters_x[clusters_x == c] = f'cluster {c}'

In [29]:
clusters_y = (sample_y > 0).astype(np.int64).astype(object)
clusters_y[clusters_y == 0] = ' '
clusters_y[clusters_y == 1] = 'target'

In [30]:
#sample_x.s

In [42]:
from src.explanation.clustering.analyisis import get_node_values_with_clusters_and_location_dataframe

df = get_node_values_with_clusters_and_location_dataframe(sample_x[..., 0:1], clusters_x, node_pos_dict, locations_df, sample_x_time)

In [43]:
df.cluster.unique()

array(['cluster 0', ' ', 'cluster 3', 'cluster 2', 'cluster 1'],
      dtype=object)

In [44]:
icons = {
    ' ': 'cancel',
    'cluster 0': 'star',
    'cluster 1': 'circle',
    'cluster 2':'heart',
    'cluster 3': 'play',
    'cluster 4': 'pause',
    'target': 'certified'}

In [45]:
df['icon'] = df['cluster'].apply(lambda x: icons[x])

In [46]:
from src.data.data_analysis import show_kepler_map

# Font size 27
show_kepler_map(
    df)

KeplerGl(data={'data':      sensor_id   latitude   longitude    cluster       speed  \
0       400001  37.3640…

In [47]:
from src.data.data_analysis import show_kepler_map

show_kepler_map(
    df)

KeplerGl(data={'data':      sensor_id   latitude   longitude    cluster       speed  \
0       400001  37.3640…

In [48]:
'''# Save m config
import json

with open('../config/kepler/visualization_test_x_clusters.json', 'w') as f:
    json.dump(m.config, f)''';

In [49]:
from src.explanation.clustering.analyisis import (
    get_node_values_with_clusters_and_location_dataframe)

df = get_node_values_with_clusters_and_location_dataframe(sample_y, clusters_y, node_pos_dict, locations_df, sample_y_time)

In [50]:
df.cluster.unique()

array([' ', 'target'], dtype=object)

In [51]:
df['icon'] = df['cluster'].apply(lambda x: icons[x])

In [52]:
from src.data.data_analysis import show_kepler_map

show_kepler_map(
    df)

KeplerGl(data={'data':      sensor_id   latitude   longitude cluster  speed            datetime  \
0       400…

In [28]:
# m

In [None]:
'''# Save m config
import json

with open('../config/kepler/visualization_test_y.json', 'w') as f:
    json.dump(m.config, f)''';