In [1]:
import sys
import os

# Set the main path in the root folder of the project.
sys.path.append(os.path.join('..'))

In [2]:
# Settings for autoreloading.
%load_ext autoreload
%autoreload 2

In [3]:
from src.utils.seed import set_random_seed

# Set the random seed for deterministic operations.
SEED = 42
set_random_seed(SEED)

In [4]:
import torch

# Set the device for training and querying the model.
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'The selected device is: "{DEVICE}"')

The selected device is: "cuda"


# Loading the Data

In [5]:
import os

BASE_DATA_DIR = os.path.join('..', 'data', 'metr-la')

In [6]:
import pickle
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [7]:
from src.spatial_temporal_gnn.model import SpatialTemporalGNN
from src.explanation.navigator.model import Navigator
from src.data.data_extraction import get_adjacency_matrix

# Get the adjacency matrix
adj_matrix_structure = get_adjacency_matrix(
    os.path.join(BASE_DATA_DIR, 'raw', 'adj_mx_metr_la.pkl'))

# Get the header of the adjacency matrix, the node indices and the
# matrix itself.
header, node_ids_dict, adj_matrix = adj_matrix_structure

# Get the STGNN and load the checkpoints.
spatial_temporal_gnn = SpatialTemporalGNN(9, 1, 12, 12, adj_matrix, DEVICE, 64)

stgnn_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                      'st_gnn_metr_la.pth')

stgnn_checkpoints = torch.load(stgnn_checkpoints_path)
spatial_temporal_gnn.load_state_dict(stgnn_checkpoints['model_state_dict'])

# Set the STGNN in evaluation mode.
spatial_temporal_gnn.eval();

# Get the Navigator and load the checkpoints.
navigator = Navigator(DEVICE)

navigator_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                          'navigator_metr_la.pth')

navigator_checkpoints = torch.load(navigator_checkpoints_path)
navigator.load_state_dict(navigator_checkpoints['model_state_dict'])

# Set the Navigator in evaluation mode.
navigator.eval();



In [8]:
from src.data.data_extraction import get_locations_dataframe

# Get the dataframe containing the latitude and longitude of each sensor.
locations_df = get_locations_dataframe(
    os.path.join(BASE_DATA_DIR, 'raw', 'graph_sensor_locations_metr_la.csv'),
    has_header=True)

In [9]:
# Get the node positions dictionary.
node_pos_dict = { i: id for id, i in node_ids_dict.items() }

In [10]:
import pickle

# Get the data scaler.
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [24]:
import os
import numpy as np

# Get the explained data.
x_test = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'x_test.npy'))
y_test = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'y_test.npy'))

# Get the time information of the explained data.
x_test_time = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'x_test_time.npy'))
y_test_time = np.load(os.path.join(BASE_DATA_DIR, 'explained', 'y_test_time.npy'))

In [25]:
import pickle

with open(os.path.join(BASE_DATA_DIR, 'structured', 'node_locations.pkl'), 'rb') as f:
    node_info = pickle.load(f)

In [57]:
i = 3
sample_x, sample_y, sample_x_time, sample_y_time = x_test[i], y_test[i], x_test_time[i], y_test_time[i]

In [58]:
_, n_timesteps, n_nodes, _ = y_test.shape

In [59]:
from src.explanation.clustering.clustering import (
    get_adjacency_distance_matrix)

adj_distance_matrix = get_adjacency_distance_matrix(adj_matrix, n_timesteps)

In [60]:
from src.explanation.clustering.clustering import (
    get_temporal_distance_matrix)

temporal_distance_matrix = get_temporal_distance_matrix(n_nodes, n_timesteps)

In [61]:
# Set the best parameters based on the results of the grid search.

EPS = .5
MIN_SAMPLES = 2

In [79]:
from sklearn_extra.cluster import KMedoids
from scipy.spatial.distance import cdist

def get_clusters(
    instance: np.ndarray,
    adj_distance_matrix: np.ndarray,
    temporal_distance_matrix: np.ndarray,
    speed_distance_weight: float = 2,
    ) -> np.ndarray:
    """
    Get the clusters of the given instance using the DBSCAN algorithm.

    Parameters
    ----------
    instance : ndarray
        The spatial-temporal graph instance to cluster.
    adj_distance_matrix : ndarray
        The adjacency matrix of the nodes in the graph measured in distance
        between 0 and 1.
    temporal_distance_matrix : ndarray
        The matrix measuring the distance between the time steps
        of the nodes in the graph between 0 and 1.
    eps : float
        The maximum distance between two samples for one to be considered
        as in the neighborhood of the other.
    min_samples : int
        The number of samples in a neighborhood for a point for it to be
        considered as a core point.
    speed_distance_weight : float, optional
        The weight of the speed distance in the clustering process,
        by default 3.
    remove_zeros : bool, optional
        Whether to remove the nodes that are not present in the instance
        during the clustering process, by default False.

    Returns
    -------
    ndarray
        The clusters of the given instance.
    """
    n_timesteps, n_nodes, _ = instance.shape

    # Reshape the instance to be a column vector.
    reshaped_instance = instance.reshape(-2, 1)

    # Compute the distance matrix between the speed of the nodes in the graph.
    speed_distance_matrix = cdist(reshaped_instance, reshaped_instance, 'euclidean')
    # Normalize the distance matrix between 0 and 1.
    speed_distance_matrix /= np.max(speed_distance_matrix)

    # Compute the weighted distance matrix between the nodes in the graph
    # in terms of speed and the spatial and temporal distances.
    distance_matrix = speed_distance_matrix * speed_distance_weight +\
        adj_distance_matrix + temporal_distance_matrix

    # Normalize the distance matrix between 0 and 1.
    #distance_matrix /= np.max(distance_matrix)
    # Set the distance between nodes that are not connected to an
    # unreachable value.
    distance_matrix[adj_distance_matrix == 1] = 1_000
    
    # Set the distance between nodes that are not present in the instance
    # to an unreachable value.
    non_zeros = np.where(reshaped_instance != 0)[0]
    # Reduce distance matrix by solely considering the nodes that are
    # present in the instance.
    distance_matrix = distance_matrix[non_zeros, :][:, non_zeros]

    # Compute the clusters of the given instance using the DBSCAN algorithm.
    kmedoid = KMedoids(metric='precomputed', n_clusters=5, max_iter=100_000, init='k-medoids++')
    clusters = kmedoid.fit_predict(distance_matrix)

    # Add a dummy dimension to the clusters array.
    clusters = np.expand_dims(clusters, axis=1)

    # Create a cluster vector with dummy -1 values.
    clusters_vector = np.full_like(reshaped_instance, -1)
    clusters_vector[non_zeros] = clusters[:]

    # Reshape the clusters array to have the same shape as the instance.
    clusters_vector = clusters_vector.reshape(n_timesteps, n_nodes, 1)
    
    clusters_vector = clusters_vector.astype(int)

    return clusters_vector

In [80]:
clusters_x = get_clusters(sample_x[..., 0:1], adj_distance_matrix, temporal_distance_matrix)



In [64]:
#clusters_x

In [65]:
'''from src.explanation.clustering.clustering import get_clusters

clusters_x = get_clusters(
    sample_x[..., :1],
    adj_distance_matrix,
    temporal_distance_matrix,
    eps=EPS,
    min_samples=MIN_SAMPLES,
    remove_zeros=True)'''

'from src.explanation.clustering.clustering import get_clusters\n\nclusters_x = get_clusters(\n    sample_x[..., :1],\n    adj_distance_matrix,\n    temporal_distance_matrix,\n    eps=EPS,\n    min_samples=MIN_SAMPLES,\n    remove_zeros=True)'

In [81]:
sample_x.nonzero()

(array([ 0,  0,  0, ..., 11, 11, 11], dtype=int64),
 array([  0,   0,   1, ..., 205, 206, 206], dtype=int64),
 array([1, 2, 1, ..., 2, 1, 2], dtype=int64))

In [82]:
clusters_x = clusters_x.astype(object)

In [83]:
np.unique(clusters_x)

array([-1, 0, 1, 2, 3, 4], dtype=object)

In [84]:
for c in np.unique(clusters_x):
    if c == -1:
        clusters_x[clusters_x == c] = ' '
    else:
        clusters_x[clusters_x == c] = f'cluster {c}'

In [85]:
clusters_y = (sample_y > 0).astype(np.int64).astype(object)
clusters_y[clusters_y == 0] = ' '
clusters_y[clusters_y == 1] = 'target'

In [86]:
#sample_x.s

In [87]:
from src.explanation.clustering.analyisis import get_node_values_with_clusters_and_location_dataframe

df = get_node_values_with_clusters_and_location_dataframe(sample_x[..., 0:1], clusters_x, node_pos_dict, locations_df, sample_x_time)

In [88]:
df.cluster.unique()

array([' ', 'cluster 1', 'cluster 0', 'cluster 2', 'cluster 4',
       'cluster 3'], dtype=object)

In [92]:
icons = {
    ' ': 'cancel',
    'cluster 0': 'star',
    'cluster 1': 'circle',
    'cluster 2':'heart',
    'cluster 3': 'play',
    'cluster 4': 'pause',
    'target': 'certified'}

In [93]:
df['icon'] = df['cluster'].apply(lambda x: icons[x])

In [94]:
from src.data.data_analysis import show_kepler_map

# Font size 27
show_kepler_map(
    df, config_file_path='../config/kepler/metr-la/visualization_test_x.json')

KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['data'], 'id': 'r4gzjf87n', …

In [95]:
from src.data.data_analysis import show_kepler_map

show_kepler_map(
    df, config_file_path='../config/kepler/metr-la/visualization_test_x_clusters.json')

KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['data'], 'id': 'r4gzjf87n', …

In [105]:
'''# Save m config
import json

with open('../config/kepler/visualization_test_x_clusters.json', 'w') as f:
    json.dump(m.config, f)''';

In [106]:
from src.explanation.clustering.analyisis import (
    get_node_values_with_clusters_and_location_dataframe)

df = get_node_values_with_clusters_and_location_dataframe(sample_y, clusters_y, node_pos_dict, locations_df, sample_y_time)

In [340]:
df.cluster.unique()

array([' ', 'target'], dtype=object)

In [341]:
df['icon'] = df['cluster'].apply(lambda x: icons[x])

In [342]:
from src.data.data_analysis import show_kepler_map

show_kepler_map(
    df, config_file_path='../config/kepler/metr-la/visualization_test_y.json')

KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['data'], 'id': 'qxv5h0is9', …

In [28]:
# m

In [None]:
'''# Save m config
import json

with open('../config/kepler/visualization_test_y.json', 'w') as f:
    json.dump(m.config, f)''';