# TFPD-Inference Notebook
Note: distances between intersections are calculated as the crow flies, aka straight line distance, between their GPS coordinates. Thus, they do not account for curving roads and such.

What's been done? (we should update this as more gets done):
- We can get the straight line distance between any two given intersections
- We can get the estimated vehicle flow for a given intersection given input of (lag) previous traffic measurements.
- We can estimate vehicle speed given vflow (as per assumption (ii). another assumption: road is always under capacity, never over)
- We can find the shortest path between two intersections using Djikstra's algorithm. This is currently only based on distance. We only need to add travel time penalties to the 'dist' variable.

What's left:
- How do we obtain (lag) previous traffic measurements? Are we supposed to use test / dummy data for this?
- 'Dist' should be converted to a 'travel time'-esque variable. This can be calculated like so after we figure out the point above:
    - x = distance in kilometers
    - y = kmh predicted at intersection. caps at 60kmh as per assumption (i)
    - (x / y) * 60 * 60 + 30
    - multiply by 60 twice to convert hours to seconds
    - add 30 secs for assumption (iii)

In [1]:
import math
import warnings
import numpy as np
import pandas as pd
from keras.models import load_model
from tensorflow.keras.utils import plot_model
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import os
from geopy.distance import geodesic
warnings.filterwarnings("ignore")

from keras.src.legacy.saving import legacy_h5_format

2024-10-17 02:07:57.382381: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-17 02:07:57.393988: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-17 02:07:57.397091: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-17 02:07:57.405386: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# needed to reconstruct scaler... we should save scalers :(
def process_data(train, test, lags):
    """Process data
    Reshape and split train\test data.

    # Arguments
        train: String, name of .csv train file.
        test: String, name of .csv test file.
        lags: integer, time lag.
    # Returns
        X_train: ndarray.
        y_train: ndarray.
        X_test: ndarray.
        y_test: ndarray.
        scaler: StandardScaler.
    """
    attr = 'VFlow'
    df1 = pd.read_csv(train, encoding='utf-8').fillna(0)
    df2 = pd.read_csv(test, encoding='utf-8').fillna(0)

    # scaler = StandardScaler().fit(df1[attr].values)
    scaler = MinMaxScaler(feature_range=(0, 1)).fit(df1[attr].values.reshape(-1, 1))
    flow1 = scaler.transform(df1[attr].values.reshape(-1, 1)).reshape(1, -1)[0]
    flow2 = scaler.transform(df2[attr].values.reshape(-1, 1)).reshape(1, -1)[0]

    train, test = [], []
    for i in range(lags, len(flow1)):
        train.append(flow1[i - lags: i + 1])
    for i in range(lags, len(flow2)):
        test.append(flow2[i - lags: i + 1])

    train = np.array(train)
    test = np.array(test)
    np.random.shuffle(train)

    X_train = train[:, :-1]
    y_train = train[:, -1]
    X_test = test[:, :-1]
    y_test = test[:, -1]

    return X_train, y_train, X_test, y_test, scaler

In [3]:
def get_speed_from_flow_per_hr(x):   
    # v = inflection speed, aka average kmh when road is at vflow q
    v = 32
    # q = inflection point, where speed converges for given vflow when road is over or under capacity
    q = 1500

    # note: for this assignment, i believe we are always assuming
    # roads are under capacity. thus this is just a high number
    road_capacity = 9999
    
    a = -(q/(v*v))
    b = -2*v*a

    # use this when road is over capacity (x >= q)
    # this means speed decreases given lower vflow
    if x >= road_capacity:
        speed_from_flow = (-b + math.sqrt(b*b + 4*a*x)) / (2*a)
    # use this when road is under capacity (x < q)
    # this means speed increases given lower vflow
    else:
        speed_from_flow = (-b - math.sqrt(b*b + 4*a*x)) / (2*a)

    # assumed speed limit of 60kmh
    speed_limit = 60
    return min(speed_limit, speed_from_flow)

def get_est_vflow_for_intersection(scats, lag_flow):
    model_variant = 'gru'
    model = load_model(f"model/{model_variant}/{scats}/{model_variant}_{scats}.h5", custom_objects={'mse': 'mse'})

    lag = 12
    
    train_folder = 'intersection/train/'
    test_folder = 'intersection/test/'

    train_file = os.path.join(train_folder, f'train_{scats}.csv')
    test_file = os.path.join(test_folder, f"test_{scats}.csv")

    _, _, X_test, y_test, scaler = process_data(train_file, test_file, lag)

    lag_flow = scaler.transform(lag_flow)
    lag_flow_reshaped = np.array([lag_flow])
    
    predicted = model.predict(lag_flow_reshaped)
    predicted = scaler.inverse_transform(predicted.reshape(-1, 1)).reshape(1, -1)[0]

    return predicted

In [4]:
# put lat,long of intersection 1 and 2 respectively here
def calculate_distance_for_coords(lat1, lon1, lat2, lon2):
    return geodesic((lat1, lon1), (lat2, lon2)).meters

In [5]:
# Example of input to the get_speed_from_flow_per_hr function
get_speed_from_flow_per_hr(753)

54.58211681840301

In [6]:
# Example of input to the get_est_vflow_for_intersection function
lag_flow = np.array([[86], [83], [52], [58], [59], [44], [31], [37], [30], [24], [16], [24]])
predicted = get_est_vflow_for_intersection(970, lag_flow)
print(predicted)

I0000 00:00:1729091278.647053   88258 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-10-17 02:07:58.664683: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[16.961971]


In [7]:
class Node():
    def __init__(self, lat, long, neighbors):
        self.dist = 99999 # dist from start node
        self.lat = lat # latitude of node (for dist calculations)
        self.long = long # longitude of node (for dist calculations)
        self.prev = -1 # scats number of previous node
        self.neighbors = neighbors # scats numbers of neighbors in list form

In [8]:
scats_neighbors_file = 'data/scats_neighbors.csv'
scats_data_file = 'scats-10-2006.csv'

# using djikstra's
# load intersections
scats_sites = pd.read_csv(scats_neighbors_file)

neighbor_cols = ['North Neighbor', 'East Neighbor', 'South Neighbor', 'West Neighbor']

for neighbor_dir in neighbor_cols:
    # scats sites can't be negative, so set NaNs to -1
    scats_sites[neighbor_dir].fillna(-1, inplace=True)
    # convert to int so we can look up sites easier
    scats_sites[neighbor_dir] = scats_sites[neighbor_dir].astype(int)

start_scats = 970
target_scats = 3180

start_intersection = scats_sites[scats_sites['SCAT number'] == start_scats]

def construct_node(pd_row):
    neighbors = []
    
    for neighbor_dir in neighbor_cols:
        neighbor = pd_row[neighbor_dir].item()
        if neighbor > 0:
            neighbors.append(neighbor)

    return Node(pd_row['NB_Latitude'], pd_row['NB_Longitude'], neighbors)

graph = {}
queue = []

# construct graph of all nodes
for index, row in scats_sites.iterrows():
    scats_num = row['SCAT number']
    graph[scats_num] = construct_node(scats_sites.iloc[index])
    queue.append(scats_num)

# set start node distance to 0 (
graph[start_scats].dist = 0

while queue:
    # get vertex in queue with minimum dist value
    min_dist, min_scats = 99999, -1
    for i in queue:
        node = graph[i]
        if node.dist < min_dist:
            min_dist = node.dist
            min_scats = i
            
    u = min_scats
    u_node = graph[u]
    
    if u == target_scats:
        break
    
    queue.remove(min_scats)

    for v in u_node.neighbors:
        v_node = graph[v]

        # distance = cost heuristic (travel time between the two intersections)
        distance = calculate_distance_for_coords(u_node.lat, u_node.long, v_node.lat, v_node.long)
        
        alt = u_node.dist + distance
        if alt < v_node.dist:
            v_node.dist = alt
            v_node.prev = u

path = [u]
prevy = u_node.prev
while prevy > 0:
    path.insert(0, prevy)
    prevy = graph[prevy].prev

print(path)

[970, 3685, 2000, 3682, 3126, 3127, 4063, 4057, 3180]
