In [1]:
import sys
import os

# Set the main path in the root folder of the project.
sys.path.append(os.path.join('..'))

In [2]:
# Settings for autoreloading.
%load_ext autoreload
%autoreload 2

In [3]:
from src.utils.seed import set_random_seed

# Set the random seed for deterministic operations.
SEED = 42
set_random_seed(SEED)

In [4]:
import torch

# Set the device for training and querying the model.
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'The selected device is: "{DEVICE}"')

The selected device is: "cuda"


# Loading the Data

In [5]:
import os

BASE_DATA_DIR = os.path.join('..', 'data', 'metr-la')

In [6]:
import pickle
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [7]:
from src.spatial_temporal_gnn.model import SpatialTemporalGNN
from src.explanation.navigator.model import Navigator
from src.data.data_extraction import get_adjacency_matrix

# Get the adjacency matrix
adj_matrix_structure = get_adjacency_matrix(
    os.path.join(BASE_DATA_DIR, 'raw', 'adj_mx_metr_la.pkl'))

# Get the header of the adjacency matrix, the node indices and the
# matrix itself.
header, node_ids_dict, adj_matrix = adj_matrix_structure

# Get the STGNN and load the checkpoints.
spatial_temporal_gnn = SpatialTemporalGNN(9, 1, 12, 12, adj_matrix, DEVICE, 64)

stgnn_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                      'st_gnn_metr_la.pth')

stgnn_checkpoints = torch.load(stgnn_checkpoints_path)
spatial_temporal_gnn.load_state_dict(stgnn_checkpoints['model_state_dict'])

# Set the STGNN in evaluation mode.
spatial_temporal_gnn.eval();

# Get the Navigator and load the checkpoints.
navigator = Navigator(DEVICE)

navigator_checkpoints_path = os.path.join('..', 'models', 'checkpoints',
                                          'navigator_metr_la.pth')

navigator_checkpoints = torch.load(navigator_checkpoints_path)
navigator.load_state_dict(navigator_checkpoints['model_state_dict'])

# Set the Navigator in evaluation mode.
navigator.eval();



In [8]:
from src.data.data_extraction import get_locations_dataframe

# Get the dataframe containing the latitude and longitude of each sensor.
locations_df = get_locations_dataframe(
    os.path.join(BASE_DATA_DIR, 'raw', 'graph_sensor_locations_metr_la.csv'),
    has_header=True)

In [9]:
# Get the node positions dictionary.
node_pos_dict = { i: id for id, i in node_ids_dict.items() }

In [10]:
import pickle

# Get the data scaler.
with open(os.path.join(BASE_DATA_DIR, 'processed', 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [11]:
import os
import numpy as np

# Get the data used for explanations.
x_train = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'x_train.npy'))
y_train = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'y_train.npy'))
x_val = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'x_val.npy'))
y_val = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'y_val.npy'))
x_test = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'x_test.npy'))
y_test = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'y_test.npy'))

# Get the time information of the data used for explanations.
x_train_time = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'x_train_time.npy'))
y_train_time = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'y_train_time.npy'))
x_val_time = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'x_val_time.npy'))
y_val_time = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'y_val_time.npy'))
x_test_time = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'x_test_time.npy'))
y_test_time = np.load(os.path.join(BASE_DATA_DIR, 'explainable', 'y_test_time.npy'))

In [12]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

geocoder = Nominatim(user_agent='metr-la')

geocode = RateLimiter(
    geocoder.geocode,
    min_delay_seconds=.05,
    return_value_on_exception=None)

In [13]:
import pandas as pd

def get_road(row: pd.Series) -> str:
    data = geocoder.reverse((row['latitude'], row['longitude'])).raw
    road = data['address']['road']
    return road

In [14]:
locations_df['road'] = locations_df.apply(get_road, axis=1)

In [15]:
'''locations_df['location'] = locations_df.apply(
    lambda row: geocoder.reverse((row['latitude'], row['longitude'])).raw,
    axis=1)''';

In [16]:
'''for id, lat, long, road in zip(locations_df['sensor_id'], locations_df['latitude'], locations_df['longitude'], locations_df['road']):
    if road == 'Arroyo Seco Parkway':
        print(id, lat, long)''';

In [17]:
'''# Open the file containing the highway nodes information.
import json

with open(os.path.join(BASE_DATA_DIR, 'raw', 'Street Centerline.geojson'), 'rb') as f:
    highway_nodes = json.load(f)['features']''';

In [18]:
# highway_nodes

In [19]:
# set(v ['address']['road'] for v in locations_df['location'].values)

In [20]:
# locations_df['location'].values[0]

In [21]:
set(road for road in locations_df['road'].values)

{'Arroyo Seco Parkway',
 'East 4th Street',
 'Foothill Freeway',
 'Glendale Freeway',
 'Golden State Freeway',
 'Hollywood Freeway',
 'San Diego Freeway',
 'Santa Ana Freeway',
 'Sherman Way',
 'US 101',
 'Ventura Freeway'}

Fixing wrong assignment of coordinates

In [22]:
'''for road in locations_df['road'].values:
    if road == 'Sherman Way':
        print('Wrong point:')
        print(road)''';

In [23]:
'''for v in locations_df['location'].values:
    if v['address']['road'] == 'East 4th Street':
        print('Wrong point:')
        print(v)''';

In [24]:
'''for v in locations_df['location'].values:
    if v['address']['road'] == 'US 101':
        print('Wrong point:')
        print(v)''';

By searching these coordinates, it can be observed that they refer to the Golden State Freeway, hence we manually change this wrongly assigned reference

In [25]:
import pandas as pd


def change_road_name(row: pd.Series) -> str:
    if row['road'] == 'East 4th Street':
        return 'Golden State Freeway'

    elif row['road'] == 'Sherman Way':
        return 'San Diego Freeway'

    elif row['road'] == 'US 101':
        return 'Hollywood Freeway'

    return row['road']

In [26]:
locations_df['road'] = locations_df.apply(change_road_name, axis=1)

In [27]:
'''for v in locations_df:
    print(v)
    if v['road'] == 'East 4th Street':
        v['road'] = 'Golden State Freeway'
    elif v['road'] == 'Sherman Way':
        v['road'] = 'San Diego Freeway'
    elif v['road'] == 'US 101':
        v['road'] = 'Hollywood Freeway''';
    #old_entry = locations_df.loc[i]
    #new_entry = old_entry.copy()
    #new_entry['location'] = v

    #locations_df.loc[i] = old_entry

In [28]:
set(road for road in locations_df['road'].values)

{'Arroyo Seco Parkway',
 'Foothill Freeway',
 'Glendale Freeway',
 'Golden State Freeway',
 'Hollywood Freeway',
 'San Diego Freeway',
 'Santa Ana Freeway',
 'Ventura Freeway'}

In [29]:
'''
for v in locations_df['location'].values:
    if v['address']['road'] == 'East 4th Street':
        print('Wrong point:')
        print(v)
''';

In [30]:
'''from typing import Any, Dict, List, Tuple

def get_road_beginning_coordinates(
    highway_nodes: List[Dict[str, Any]],
    road_name: str,
    entry_road_name: str
    ) -> Tuple[float, float]:
    def is_highway_point_matching(point: Dict[str, Any]) -> bool:
        properties = point['properties']
        entry_point_matches = 'from_st' in properties and \
            properties['from_st'] is not None and \
            entry_road_name.lower() == properties['from_st'].lower()
        exit_point_matches = 'to_st' in properties and \
            properties['to_st'] is not None and \
            road_name.lower() == properties['to_st'].lower()
        return entry_point_matches and exit_point_matches

    for point in highway_nodes:
        if is_highway_point_matching(point):
            properties = point['properties']
            return properties['lat'], properties['lon']
    return None''';

In [31]:
# a = get_road_beginning_coordinates(highway_nodes, 'Pasadena Frwy', 'Alpine St')

In [32]:
# print(a)

Road Beginnings Measured from: 
* [ArcGIS official website](https://www.arcgis.com/home/webmap/viewer.html?featurecollection=https%3A%2F%2Fgeo.dot.gov%2Fserver%2Frest%2Fservices%2FHosted%2FCalifornia_2018_PR%2FFeatureServer%3Ff%3Djson%26option%3Dfootprints&supportsProjection=true&supportsJSONP=true)

In [33]:
road_beginnings = {
    'Arroyo Seco Parkway': (34.06261, -118.24863),
    'Foothill Freeway': (34.317596, -118.481173),
    'Glendale Freeway': (34.207526, -118.215157),
    'Golden State Freeway': (34.060422, -118.213057),
    'Hollywood Freeway': (34.151159, -118.373795),
    'San Diego Freeway': (34.294368, -118.469921),
    'Santa Ana Freeway': (34.06312,	-118.247073),
    'Ventura Freeway': (34.147306, -118.160817)}

In [34]:
'''from geopy.distance import distance

coords_1 = (52.2296756, 21.0122287)
coords_2 = (52.406374, 16.9251681)

print(distance(coords_1, coords_2).km)''';

In [37]:
import pandas as pd
from geopy.distance import distance


def get_kilometrage(
    row: pd.Series,
    ) -> float:
    road = row['road']
    road_beginning_coordinates = road_beginnings[road]
    coordinates = (row['latitude'], row['longitude'])
    return distance(road_beginning_coordinates, coordinates).km

In [38]:
locations_df['kilometrage'] = locations_df.apply(get_kilometrage, axis=1)

In [40]:
locations_df.head(10)

Unnamed: 0_level_0,sensor_id,latitude,longitude,road,kilometrage
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,773869,34.15497,-118.31829,Ventura Freeway,14.547154
1,767541,34.11621,-118.23799,Glendale Freeway,10.345752
2,767542,34.11641,-118.23819,Glendale Freeway,10.327808
3,717447,34.07248,-118.26772,Hollywood Freeway,13.112986
4,717446,34.07142,-118.26572,Hollywood Freeway,13.329056
5,717445,34.06913,-118.25932,Hollywood Freeway,13.94093
6,773062,34.05368,-118.23369,Santa Ana Freeway,1.619571
7,767620,34.13486,-118.22932,Glendale Freeway,8.165598
8,737529,34.20264,-118.47352,San Diego Freeway,10.1805
9,717816,34.15562,-118.4686,San Diego Freeway,15.391305


In [None]:
'''locations_df['kilometrage'] = locations_df.apply(
    lambda row: geocoder.reverse((row['latitude'], row['longitude'])).raw,
    axis=1)''';

In [None]:
'''import requests

# Define the API key
api_key = YOUR_API_KEY

# Define the starting and ending points for the route
origin = 'San Francisco, California'
destination = 'Cleveland, Ohio'

# Build the URL for the Google Maps Distance Matrix API
url = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial'
url += '&origins=' + origin
url += '&destinations=' + destination
url += '&mode=driving'
url += '&key=' + api_key

# Make the API request and store the response
response = requests.get(url)
data = response.json()

#Extract the driving distance from the response
driving_distance = data['rows'][0]['elements'][0]['distance']['text']

# Print the driving distance
print('The driving distance between {} and {} is {}.'.format(origin, destination, driving_distance))''';

In [None]:
# road_beginnings = {}

# road_beginnings['Arroyo Seco Parkway'] = get_road_beginning_coordinates(highway_nodes, 'Pasadena Frwy', 'Alpine St')

In [None]:
'''
for e in highway_nodes:
    properties = e['properties']
    if ('to_st' in properties and properties['to_st'] is not None and 'pasadena frwy' in properties['to_st'].lower()):
        print(properties)
''';

{'zip_code': '90012', 'cl_node_id': '58045.0', 'from_st': 'STADIUM WAY', 'x': '6489756.63125', 'y': '1847768.44896', 'assetid': '113351.0', 'lon': '-118.23744492', 'tooltip': 'STADIUM WAY at PASADENA FRWY', 'type': None, 'to_st': 'PASADENA FRWY', 'lat': '34.06952458'}
{'zip_code': '90012', 'cl_node_id': '58041.0', 'from_st': 'STADIUM WAY', 'x': '6489886.23646', 'y': '1847913.325', 'assetid': '120397.0', 'lon': '-118.2370181', 'tooltip': 'STADIUM WAY at PASADENA FRWY', 'type': None, 'to_st': 'PASADENA FRWY', 'lat': '34.06992352'}
{'zip_code': '90012', 'cl_node_id': '20438.0', 'from_st': 'SOLANO AVE', 'x': '6491504.3651', 'y': '1849664.18854', 'assetid': '132314.0', 'lon': '-118.2316884', 'tooltip': 'SOLANO AVE at PASADENA FRWY', 'type': None, 'to_st': 'PASADENA FRWY', 'lat': '34.07474501'}
{'zip_code': '90065', 'cl_node_id': '55770.0', 'from_st': 'PASADENA AVE', 'x': '6497242.03177', 'y': '1853895.53594', 'assetid': '145121.0', 'lon': '-118.21277124', 'tooltip': 'PASADENA AVE at PASADEN

In [None]:
'''
node_highway_dict = {}

for row in locations_df.itertuples():
    road = geocoder.reverse((row.latitude, row.longitude)).raw['address']['road']
    node_highway_dict[row.sensor_id] = road
''';

GeocoderUnavailable: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /reverse?lat=34.20264&lon=-118.47352&format=json&addressdetails=1 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x00000223F9B1F040>, 'Connection to nominatim.openstreetmap.org timed out. (connect timeout=1)'))

In [None]:
# set((v['address']['road'], v['address']['highway'] if 'highway' in v['address'] else None) for v in locations_df['location'].values )

{('Arroyo Seco Parkway', None),
 ('East 4th Street', None),
 ('Foothill Freeway', None),
 ('Glendale Freeway', None),
 ('Golden State Freeway', '141'),
 ('Golden State Freeway', '141B'),
 ('Golden State Freeway', 'Glendale & Riverside'),
 ('Golden State Freeway', None),
 ('Hollywood Freeway', '101 & Western'),
 ('Hollywood Freeway', '11B'),
 ('Hollywood Freeway', '12A'),
 ('Hollywood Freeway', '6A'),
 ('Hollywood Freeway', '9C'),
 ('Hollywood Freeway', 'Western & La Mirada'),
 ('Hollywood Freeway', None),
 ('San Diego Freeway', None),
 ('Santa Ana Freeway', None),
 ('Sherman Way', None),
 ('US 101', None),
 ('Ventura Freeway', '5'),
 ('Ventura Freeway', None)}

In [None]:
# set(v ['address']['road'] for v in locations_df['location'].values)

{'Arroyo Seco Parkway',
 'East 4th Street',
 'Foothill Freeway',
 'Glendale Freeway',
 'Golden State Freeway',
 'Hollywood Freeway',
 'San Diego Freeway',
 'Santa Ana Freeway',
 'Sherman Way',
 'US 101',
 'Ventura Freeway'}

In [None]:
'''for t, n in zip(node_idxs[0], node_idxs[1]):
    latitude = locations_df[locations_df['sensor_id'] == node_pos_dict[n]]['latitude'].values[0]
    longitude = locations_df[locations_df['sensor_id'] == node_pos_dict[n]]['longitude'].values[0]
    location = geocoder.reverse((latitude, longitude))
    address = location.raw['address']
    city = address['city']
    street = address['road']
    try:
        area = address['suburb']
    except KeyError:
        area = address['neighbourhood']
    nodes_with_time_and_speed.append((y_test_time_sample[t, n], n, y_test_sample[t, n, 0], city, street, area))''';


In [None]:
'''nodes_with_time_and_speed = [] 

for t, n in zip(node_idxs[0], node_idxs[1]):
    latitude = locations_df[locations_df['sensor_id'] == node_pos_dict[n]]['latitude'].values[0]
    longitude = locations_df[locations_df['sensor_id'] == node_pos_dict[n]]['longitude'].values[0]
    location = geocoder.reverse((latitude, longitude))
    address = location.raw['address']
    city = address['city']
    street = address['road']
    try:
        area = address['suburb']
    except KeyError:
        area = address['neighbourhood']
    nodes_with_time_and_speed.append((y_test_time_sample[t, n], n, y_test_sample[t, n, 0], city, street, area))
''';

In [None]:
# nodes_with_time_and_speed

[(numpy.datetime64('2012-06-04T05:45:00.000000000'),
  16,
  35.352768,
  'Los Angeles',
  'Arroyo Seco Parkway',
  'Chinatown'),
 (numpy.datetime64('2012-06-04T05:45:00.000000000'),
  196,
  35.112137,
  'Los Angeles',
  'Arroyo Seco Parkway',
  'Dayton Avenue'),
 (numpy.datetime64('2012-06-04T05:50:00.000000000'),
  16,
  34.866005,
  'Los Angeles',
  'Arroyo Seco Parkway',
  'Chinatown'),
 (numpy.datetime64('2012-06-04T05:50:00.000000000'),
  196,
  34.5799,
  'Los Angeles',
  'Arroyo Seco Parkway',
  'Dayton Avenue'),
 (numpy.datetime64('2012-06-04T05:55:00.000000000'),
  16,
  34.926838,
  'Los Angeles',
  'Arroyo Seco Parkway',
  'Chinatown'),
 (numpy.datetime64('2012-06-04T05:55:00.000000000'),
  196,
  34.584217,
  'Los Angeles',
  'Arroyo Seco Parkway',
  'Dayton Avenue'),
 (numpy.datetime64('2012-06-04T06:00:00.000000000'),
  16,
  35.600292,
  'Los Angeles',
  'Arroyo Seco Parkway',
  'Chinatown'),
 (numpy.datetime64('2012-06-04T06:00:00.000000000'),
  196,
  35.142715,
  'L

In [None]:
#

Unnamed: 0_level_0,sensor_id,latitude,longitude
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,773869,34.15497,-118.31829
1,767541,34.11621,-118.23799
2,767542,34.11641,-118.23819
3,717447,34.07248,-118.26772
4,717446,34.07142,-118.26572


In [None]:
'''from geopy.geocoders import Nominatim
geocoder = Nominatim(user_agent='test')

from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(
    geocoder.geocode,
    min_delay_seconds=1,
    return_value_on_exception=None)

# Get location information from latitude and longitude.
location = geocoder.reverse((nodes_with_time_and_speed[0][-2], nodes_with_time_and_speed[0][-1]))''';

In [None]:
#location.raw

{'place_id': 142783228,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 148249794,
 'lat': '34.073151616149595',
 'lon': '-118.23389240513956',
 'display_name': 'Arroyo Seco Parkway, Chinatown, Los Angeles, Los Angeles County, California, 90086, United States',
 'address': {'road': 'Arroyo Seco Parkway',
  'suburb': 'Chinatown',
  'city': 'Los Angeles',
  'county': 'Los Angeles County',
  'state': 'California',
  'ISO3166-2-lvl4': 'US-CA',
  'postcode': '90086',
  'country': 'United States',
  'country_code': 'us'},
 'boundingbox': ['34.0730538', '34.0746874', '-118.233984', '-118.2324543']}

In [None]:
'''from geopy.geocoders import Nominatim
geocoder = Nominatim(user_agent='test')

from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(
    geocoder.geocode,
    min_delay_seconds=1,
    return_value_on_exception=None)

# Get location information from latitude and longitude.
location = geocoder.reverse((nodes_with_time_and_speed[1][-2], nodes_with_time_and_speed[1][-1]))''';

In [None]:
# location.raw['address']

{'road': 'Arroyo Seco Parkway',
 'neighbourhood': 'Dayton Avenue',
 'city': 'Los Angeles',
 'county': 'Los Angeles County',
 'state': 'California',
 'ISO3166-2-lvl4': 'US-CA',
 'postcode': '90031',
 'country': 'United States',
 'country_code': 'us'}