In [25]:
import sys
import os

# Set the main path in the root folder of the project.
sys.path.append(os.path.join('..'))

In [26]:
# Settings for autoreloading

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
# Set the base data directory.
BASE_DATA_DIR = os.path.join('..', 'data', 'metr-la', 'raw')

# Set the base configuration directory.
BASE_CONFIG_DIR = os.path.join('..', 'config', 'kepler', 'metr-la')

# 1 Dataset Download

Firstly, the *Metr-LA* dataset is downloaded along with the dataset containing the latitude and lonngitude information of each sensor.

In [28]:
from src.data.data_extraction import get_node_values_dataframe

# Get the dataframe containing the node values at different timestamps.
node_values_df = get_node_values_dataframe(
    os.path.join(BASE_DATA_DIR, 'metr-la.h5'), turn_in_kmph=True)

In [29]:
node_values_df.head()

Unnamed: 0,773869,767541,767542,717447,717446,717445,773062,767620,737529,717816,...,772167,769372,774204,769806,717590,717592,717595,772168,718141,769373
2012-03-01 00:00:00,103.60152,108.831888,108.027216,98.974656,107.62488,110.6424,104.808528,108.027216,95.957136,100.986336,...,73.42632,105.412032,103.802688,106.906423,107.62488,95.5548,111.044736,95.353632,111.044736,99.57816
2012-03-01 00:05:00,100.852224,110.329472,105.322624,100.494592,103.71328,109.614208,104.60736,104.60736,92.447872,101.92512,...,81.540096,112.452912,107.2896,94.236032,99.779328,98.3488,103.71328,89.944448,110.150656,101.187504
2012-03-01 00:10:00,102.998016,102.59568,96.56064,94.951296,107.021376,106.61904,103.802688,103.400352,102.796848,105.210864,...,71.012304,111.044736,90.927936,95.353632,109.63656,100.584,105.6132,98.773488,112.424174,99.779328
2012-03-01 00:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2012-03-01 00:20:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
print('Shape of the node value dataframe:', node_values_df.shape)

Shape of the node value dataframe: (34272, 207)


In [31]:
from src.data.data_extraction import get_locations_dataframe

# Get the dataframe containing the latitude and longitude of each sensor.
locations_df = get_locations_dataframe(
    os.path.join(BASE_DATA_DIR, 'graph_sensor_locations_metr_la.csv'),
    has_header=True)

In [32]:
from src.spatial_temporal_gnn.model import SpatialTemporalGNN
from src.data.data_extraction import get_adjacency_matrix

# Get the adjacency matrix
adj_matrix_structure = get_adjacency_matrix(
    os.path.join(BASE_DATA_DIR, 'adj_mx_metr_la.pkl'))

# Get the header of the adjacency matrix, the node indices and the
# matrix itself.
header, node_ids_dict, adj_matrix = adj_matrix_structure


In [34]:
locations_df.head()

Unnamed: 0_level_0,sensor_id,latitude,longitude
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,773869,34.15497,-118.31829
1,767541,34.11621,-118.23799
2,767542,34.11641,-118.23819
3,717447,34.07248,-118.26772
4,717446,34.07142,-118.26572


In [35]:
print('Shape of the locations dataframe:', locations_df.shape)

Shape of the locations dataframe: (207, 3)


# 2 Spatial Analysis

The spatial network representing the city is observed. The network covers the area of central *Los Angeles* and it includes the *Glendale*, *Burbank* and *La Cañada Flintridge* areas. On the west, it extends up to the *Sepulveda* zone.

Style: https://raw.githubusercontent.com/heshan0131/kepler.gl-data/master/style/basic.json

Radius 15

In [36]:
from src.data.data_analysis import show_kepler_map

print('Metr-LA spatial map:')
show_kepler_map(locations_df,
                os.path.join(BASE_CONFIG_DIR, 'spatial-map.json'))

Metr-LA spatial map:


KeplerGl(config={'version': 'v1', 'config': {'visState': {'layers': [{'id': 'msruhg9', 'type': 'point', 'confi…