# Using a smller chunk of the entire NGSIM dataset. Multivariate input and multivariate output time series problem has been defined and used to train a RNN model using LSTM and dense input layer and dense output layer.

In [1]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '5,6'
import os
os.environ["CUDA_VISIBLE_DEVICES"]="5,6"
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [2]:
# Use seaborn for pairplot.
!pip install -q seaborn
!pip install -q tensorflow

[0m

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

print(tf.__version__)

2024-05-23 08:46:53.676349: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9360] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-23 08:46:53.676403: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-23 08:46:53.676455: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1537] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-23 08:46:53.687695: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.14.0


In [73]:
# print(tf.test.is_built_with_cuda())
# print(tf.test.is_built_with_cudnn())

In [5]:
# Define columns to use
columns_to_use = ["global_x", "global_y", "v_class", "global_time"] #using just 4 of them

# Define chunk size for reading data
chunk_size = 100

# Initialize an empty list to store data chunks
data_chunks = []

for chunk in pd.read_csv("https://data.transportation.gov/resource/8ect-6jqj.csv", 
                         chunksize=chunk_size, usecols=columns_to_use):
    data_chunks.append(chunk)

# Concatenate data chunks into a single DataFrame
trajectory_dataset = pd.concat(data_chunks, ignore_index=True)

# trajectory_dataset = pd.read_csv(
#     "https://data.transportation.gov/resource/8ect-6jqj.csv")

#   names=["Vehicle_ID", "Frame_Id", "Total_Frames", "Global_Time", "Local_X",
#            "Local_Y", "Global_X", "Global_Y", "v_length", "v_Width", "v_Vel", 
#            "v_Acc", "Lane_ID", "O_Zone", "D_Zone", "Int_ID", "Section_ID", "Direction",
#            "Movement", "Preceding", "Following", "Space_Headway", "Time_Headway", "Location"]

In [6]:
t_dataset = trajectory_dataset.copy()
t_dataset.tail()

Unnamed: 0,global_time,global_x,global_y,v_class
995,1163074800,2230522.629,1375574.155,2
996,1163074100,2230522.629,1375574.155,2
997,1163054800,2230522.629,1375574.155,2
998,1163054300,2230522.629,1375574.155,2
999,1163071000,2230522.629,1375574.155,2


In [13]:
# Define input features and labels
input_features = ["global_x", "global_y", "v_class", "global_time"]
output_labels = ["global_x", "global_y"]

# Split dataset into input features and labels
X = t_dataset[input_features]
y = t_dataset[output_labels]

# Split the dataset into training and test sets (80-20 split)
train_features, test_features, train_labels, test_labels = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shape of training and test sets
print("Train Features Shape:", train_features.shape)
print("Train Labels Shape:", train_labels.shape)
print("Test Features Shape:", test_features.shape)
print("Test Labels Shape:", test_labels.shape)

Train Features Shape: (800, 4)
Train Labels Shape: (800, 2)
Test Features Shape: (200, 4)
Test Labels Shape: (200, 2)


In [44]:
train_labels_x = train_labels['global_x']
train_labels_y = train_labels['global_y']

test_labels_x = test_labels['global_x']
test_labels_y = test_labels['global_y']

In [54]:
input_normalizer = tf.keras.layers.Normalization(axis=-1)
input_normalizer.adapt(np.array(train_features))
print(input_normalizer.mean.numpy())

[[[[[[2.231e+06 1.376e+06 2.067e+00 1.163e+09]]]]]]


In [55]:
# first = np.array(train_features[:1])

# with np.printoptions(precision=2, suppress=True):
#   print('First example:', first)
#   print()
#   print('Normalized:', input_normalizer(first).numpy())

In [61]:
train_features_normalized = input_normalizer(np.array(train_features)).numpy()

test_features_normalized = input_normalizer(np.array(test_features)).numpy()

In [62]:
#normailizing the output layer
output_normalizer=layers.Normalization(axis=-1)
output_normalizer.adapt(train_labels)
print(output_normalizer.mean.numpy())

[[2230521.2 1375565.4]]


In [63]:
# # Checking the nomalized output layer
# second = np.array(train_labels[:1])
# print(output_normalizer(second).numpy())

In [68]:
train_labels_normalized = output_normalizer(np.array(train_labels)).numpy()

test_labels_normalized = output_normalizer(np.array(test_labels)).numpy()

In [69]:
# Reshaping train_features and test_features to include the timestep dimension (to ensure compatibility with the RNN layer's expected input shape)
train_features = np.expand_dims(train_features, axis=1)
test_features = np.expand_dims(test_features, axis=1)

In [70]:
# Defining RNN model architecture
rnn_model = keras.Sequential([
    layers.LSTM(64, return_sequences=True, input_shape=(1, len(input_features))),
    layers.LSTM(64),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(output_labels))  # Output layer predicts global_x and global_y
])

# Compiling the model
rnn_model.compile(loss='mean_absolute_error',
              optimizer=tf.keras.optimizers.Adam(0.001))

In [71]:
# # Train the model
# history = model.fit(train_dataset, validation_data=test_dataset, epochs=100, verbose=1)

In [72]:
%%time
history = rnn_model.fit(
    train_features_normalized,
    train_labels_normalized,
    validation_split=0.2,
    verbose=0, epochs=100)

ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1377, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1360, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1349, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1126, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_3" is incompatible with the layer: expected shape=(None, 1, 4), found shape=(32, 1, 1, 1, 1, 4)


In [24]:
# Evaluating the model
test_loss = rnn_model.evaluate(test_features)
print("Test Loss:", test_loss)

Test Loss: 0.0


In [None]:
test_results = rnn_model.evaluate(test_features, {'output_x': test_labels_x, 'output_y': test_labels_y}, verbose=0)

In [25]:
plot_loss(history)

NameError: name 'plot_loss' is not defined