In [1]:
import sys
import os

# Set the main path in the root folder of the project.
sys.path.append(os.path.join('..'))

In [2]:
# Settings for autoreloading.
%load_ext autoreload
%autoreload 2

In [3]:
from src.utils.seed import set_random_seed

# Set the random seed for deterministic operations.
SEED = 42
set_random_seed(SEED)

In [4]:
import torch

# Set the device for training and querying the model.
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'The selected device is: "{DEVICE}"')

The selected device is: "cuda"


# Loading the Data

In [5]:
import os

BASE_DATA_DIR = os.path.join('..', 'data', 'metr-la')

In [6]:
import pickle
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [7]:
from src.spatial_temporal_gnn.model import SpatialTemporalGNN
from src.explanation.navigator.model import Navigator
from src.data.data_extraction import get_adjacency_matrix

# Get the adjacency matrix
adj_matrix_structure = get_adjacency_matrix(
    os.path.join(BASE_DATA_DIR, 'raw', 'adj_mx_metr_la.pkl'))

# Get the header of the adjacency matrix, the node indices and the
# matrix itself.
header, node_ids_dict, adj_matrix = adj_matrix_structure

# Get the STGNN and load the checkpoints.
spatial_temporal_gnn = SpatialTemporalGNN(9, 1, 12, 12, adj_matrix, DEVICE, 64)

stgnn_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                      'st_gnn_metr_la.pth')

stgnn_checkpoints = torch.load(stgnn_checkpoints_path)
spatial_temporal_gnn.load_state_dict(stgnn_checkpoints['model_state_dict'])

# Set the STGNN in evaluation mode.
spatial_temporal_gnn.eval();

# Get the Navigator and load the checkpoints.
navigator = Navigator(DEVICE)

navigator_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                          'navigator_metr_la.pth')

navigator_checkpoints = torch.load(navigator_checkpoints_path)
navigator.load_state_dict(navigator_checkpoints['model_state_dict'])

# Set the Navigator in evaluation mode.
navigator.eval();



In [8]:
from src.data.data_extraction import get_locations_dataframe

# Get the dataframe containing the latitude and longitude of each sensor.
locations_df = get_locations_dataframe(
    os.path.join(BASE_DATA_DIR, 'raw', 'graph_sensor_locations_metr_la.csv'),
    has_header=True)

In [9]:
# Get the node positions dictionary.
node_pos_dict = { i: id for id, i in node_ids_dict.items() }

In [10]:
import pickle

# Get the data scaler.
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [11]:
import os
import numpy as np

# Get the explained data.
x_test = np.load(os.path.join(BASE_DATA_DIR, 'processed', 'x_test.npy'))
y_test = np.load(os.path.join(BASE_DATA_DIR, 'processed', 'y_test.npy'))

# Get the time information of the explained data.
x_test_time = np.load(os.path.join(BASE_DATA_DIR, 'processed', 'x_test_time.npy'))
y_test_time = np.load(os.path.join(BASE_DATA_DIR, 'processed', 'y_test_time.npy'))

In [26]:
MPH_TO_KMH_FACTOR = 1.609344

sample_x, sample_y, sample_x_time, sample_y_time = x_test[0], y_test[0], x_test_time[0], y_test_time[0]

sample_x[:, :, 0] = sample_x[:, :, 0] * MPH_TO_KMH_FACTOR
sample_y[:, :, 0] = sample_y[:, :, 0] * MPH_TO_KMH_FACTOR

In [27]:
sample_x.shape

(12, 207, 9)

In [28]:
_, n_timesteps, n_nodes, _ = y_test.shape

In [29]:
# Get the node positions dictionary.
node_pos_dict = { i: id for id, i in node_ids_dict.items() }

In [33]:
import pandas as pd
from typing import Dict, Optional

def get_node_values_with_location_dataframe(
    node_values: np.ndarray,
    node_pos_dict: Dict[int, str],
    locations_df: pd.DataFrame,
    time_values: Optional[np.ndarray] = None
    ) -> pd.DataFrame:
    """
    Get a pandas dataframe from a numpy array of node speed values a numpy
    array of node clusters and a pandas dataframe of node locations.
    The resulting dataframe has for each timestamp the value of the speed
    for each node, its cluster and the location of the node in the form of
    latitude and longitude.

    Parameters
    ----------
    node_values : ndarray
        The numpy array containing the values of the speed of each node
        for each timestamp.
    clusters : ndarray
        The numpy array containing the cluster of each node for each
        timestamp.
    node_pos_dict : { int: str }
        The dictionary containing the position of each node and the
        corresponding node ID.
    locations_df : DataFrame
        The dataframe containing the location of each node.
    time_values : ndarray, optional
        The numpy array containing the timestamps of the speed values.

    Returns
    -------
    DataFrame
        The resulting dataframe containing the values of the speed for
        each node, its cluster and the location of the node in the form of
        latitude and longitude.
    """
    # Concatenate the speeds and the cluster information.
    nodes_information = []

    for time_idx, node_matrix in enumerate(node_values):
        for node_idx, speed in enumerate(node_matrix):
            if time_values is not None:
                # Get the timestamp from the time values array.
                datetime = time_values[time_idx, node_idx]
            else:
                datetime = time_idx
            # Get the node ID from the node position dictionary.
            node_id = node_pos_dict[node_idx]
            # Get the latitude and longitude of the node.
            latitude = locations_df.loc[
                locations_df['sensor_id'] == node_id].latitude.values[0]
            longitude = locations_df.loc[
                locations_df['sensor_id'] == node_id].longitude.values[0]

            # Update the nodes information list.
            nodes_information.append(
                [node_id, latitude, longitude, speed[0], datetime])

    # Build the dataframe from the nodes information list.
    df = pd.DataFrame({
        'sensor_id': [n[0] for n in nodes_information],
        'latitude': [n[1] for n in nodes_information],
        'longitude': [n[2] for n in nodes_information],
        'speed': [n[3] for n in nodes_information],
        'datetime': [n[4] for n in nodes_information]
    })

    return df


df = get_node_values_with_location_dataframe(sample_y, node_pos_dict, locations_df, sample_y_time)

In [34]:
df

Unnamed: 0,sensor_id,latitude,longitude,speed,datetime
0,773869,34.15497,-118.31829,108.027216,2012-06-04 05:45:00
1,767541,34.11621,-118.23799,102.796848,2012-06-04 05:45:00
2,767542,34.11641,-118.23819,110.843568,2012-06-04 05:45:00
3,717447,34.07248,-118.26772,87.105744,2012-06-04 05:45:00
4,717446,34.07142,-118.26572,105.814368,2012-06-04 05:45:00
...,...,...,...,...,...
2479,717592,34.14604,-118.22430,104.607360,2012-06-04 06:40:00
2480,717595,34.14163,-118.18290,110.329472,2012-06-04 06:40:00
2481,772168,34.16542,-118.47985,69.559424,2012-06-04 06:40:00
2482,718141,34.15133,-118.37456,107.468416,2012-06-04 06:40:00


In [35]:
from src.data.data_analysis import show_kepler_map

# Font size 27
show_kepler_map(
    df,None)

KeplerGl(data={'data':      sensor_id  latitude  longitude       speed            datetime
0       773869  34.…

In [32]:
from src.data.data_analysis import show_kepler_map

# Font size 27
show_kepler_map(
    df,None)

KeplerGl(data={'data':      sensor_id  latitude  longitude       speed            datetime
0       773869  34.…

In [24]:
from src.data.data_analysis import show_kepler_map

show_kepler_map(
    df, config_file_path='../config/kepler/metr-la/visualization_test_x_clusters.json')

KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['data'], 'id': 'r4gzjf87n', …

In [25]:
'''# Save m config
import json

with open('../config/kepler/visualization_test_x_clusters.json', 'w') as f:
    json.dump(m.config, f)''';

In [26]:
from src.explanation.clustering.analyisis import (
    get_node_values_with_clusters_and_location_dataframe)

df = get_node_values_with_clusters_and_location_dataframe(sample_y, clusters_y, node_pos_dict, locations_df, sample_y_time)

In [27]:
from src.data.data_analysis import show_kepler_map

show_kepler_map(
    df, config_file_path='../config/kepler/metr-la/visualization_test_y.json')

KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['data'], 'id': 'qxv5h0is9', …

In [28]:
# m

In [None]:
'''# Save m config
import json

with open('../config/kepler/visualization_test_y.json', 'w') as f:
    json.dump(m.config, f)''';