In [None]:
import pandas as pd
import torch.nn as nn
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, random_split
from tqdm import tqdm

csv_highway = "trajectories-0400-0415.csv" #highway
csv_intersection="NGSIM__Lankershim_Vehicle_Trajectories.csv" #intersection

chunk_size = 100000 
dataframes = []
for chunk in pd.read_csv(csv_intersection, chunksize=chunk_size):
    dataframes.append(chunk)

#Vehicle ID is not unique, but as per data Vehicle ID and Total Frames together is primary key
df = pd.concat(dataframes, ignore_index=True)
df['Unique_ID'] = df['Vehicle_ID'].astype(str) + df['Total_Frames'].astype(str)


dataframes_dict = {}


grouped = df.groupby('Unique_ID')

#Grouped data by each unique ID
for group_name, group_df in grouped:
    dataframes_dict[group_name] = group_df

_Transformer Architecture_

In [None]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, seq_len, dropout=0.1):
        super(TimeSeriesTransformer, self).__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.embedding = nn.Linear(input_dim, d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        
        self.positional_encoding = self._generate_positional_encoding(d_model)
        self.output_layer = nn.Linear(d_model, 1)  # predicting two values: v_Vel and v_Acc
        
    def forward(self, src):
        src = self.embedding(src) * torch.sqrt(torch.tensor(self.d_model))
        pe = self._generate_positional_encoding(src.size(0))  # generate positional encoding dynamically
        src += pe
        output = self.transformer_encoder(src)
        output = self.output_layer(output[:, -1, :])
        return output
    
    def _generate_positional_encoding(self, batch_size):
        position = torch.arange(0, self.seq_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / self.d_model))
        pe = torch.zeros(self.seq_len, self.d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).repeat(batch_size, 1, 1)
        return pe

input_dim = 3  
nhead = 8  
num_encoder_layers = 6  
dim_feedforward = 2048 
dropout = 0.1  
sequence_length=10

d_model = 8 * nhead

model_transformer = TimeSeriesTransformer(
    input_dim=input_dim,
    d_model=d_model,
    nhead=nhead,
    num_encoder_layers=num_encoder_layers,
    dim_feedforward=dim_feedforward,
    dropout=dropout,
    seq_len=sequence_length
)

*LSTM Architecture*

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

input_size=input_dim
hidden_size = 64
num_layers = 2
output_size = 1
num_epochs = 100
learning_rate = 0.001

model_lstm = LSTM(input_size, hidden_size, num_layers, output_size).to(device)

*Model Training Phase*

In [None]:
def create_sequences(input_data, target_data, sequence_length):
    sequences = []
    targets = []
    for i in range(len(input_data) - sequence_length):
        seq = input_data[i:i+sequence_length]  # Sequence of features
        target = target_data[i+sequence_length]  # Next step's features
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

losses_val=[]
model=model_transformer #Choose which model to train
p=0
for Car_ID in dataframes_dict.keys():

    df2=dataframes_dict[Car_ID]
    if df.empty:
        print(Car_ID," Empty")
    else:
        print("Going")
    df2=df2[['Local_X','Local_Y','Lane_ID','Preceding','Following','v_Vel']]
    normalized_data = (df2 - df2.mean()) / df2.std()
    feature_columns = ['Local_X', 'Local_Y', 'v_Vel']
    target_columns = ['v_Vel']

    # Create sequences
    sequence_length = 10  # For example, using the last 10 timesteps to predict the next

   
    feature_data = normalized_data[feature_columns].values
    target_data = normalized_data[target_columns].values

    sequences, targets = create_sequences(feature_data, target_data, sequence_length)

 
    train_size = int(len(sequences) * 0.9)
    val_size = int(len(sequences) * 0.1)

    train_sequences, train_targets = sequences[:train_size], targets[:train_size]
    val_sequences, val_targets = sequences[train_size:train_size+val_size], targets[train_size:train_size+val_size]
    test_sequences, test_targets = sequences[train_size+val_size:], targets[train_size+val_size:]


    train_sequences = torch.tensor(train_sequences, dtype=torch.float16)
    train_targets = torch.tensor(train_targets, dtype=torch.float16)
    val_sequences = torch.tensor(val_sequences, dtype=torch.float16)
    val_targets = torch.tensor(val_targets, dtype=torch.float16)
    test_sequences = torch.tensor(test_sequences, dtype=torch.float16)
    test_targets = torch.tensor(test_targets, dtype=torch.float16)

    loss_function = torch.nn.MSELoss()  # Mean Squared Error Loss for regression tasks
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # Adam optimizer


    train_data = TensorDataset(train_sequences, train_targets)
    val_data = TensorDataset(val_sequences, val_targets)


    batch_size = 64
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)


    num_epochs = 1 
    for epoch in range(num_epochs):
        model.train()
        train_losses = []
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
        total_iterations = len(train_loader)
        for batch_idx, (seq, target) in enumerate(progress_bar):
            optimizer.zero_grad()  
            seq = seq.float()
            target = target.float()
            output = model(seq) 
            loss = loss_function(output, target)  
            loss.backward() 
            optimizer.step()  
            train_losses.append(loss.item())  

            remaining_iterations = len(train_loader) - len(train_losses)
            progress_bar.set_postfix(Loss=loss.item(), Remaining=remaining_iterations)

        model.eval()  
        with torch.no_grad():
            val_losses = []
            for seq, target in val_loader:
                seq = seq.float()
                target = target.float()
                output = model(seq) 
                loss = loss_function(output, target) 
                val_losses.append(loss.item()) 
        losses_val.append(val_losses)
        print(f"Epoch {epoch+1}, Training Loss: {np.mean(train_losses):.4f}, Validation Loss: {np.mean(val_losses):.4f}")


In [None]:
torch.save(model.state_dict(), 'model_weights_single.pth') #Save weights for further use


In [None]:
model=model_transformer
model.load_state_dict(torch.load('model_weights_single.pth')) #Use pre-trained weights for test datasets

TEST ON ANY DATASET

In [None]:
csv_file_path = "trajectories-0820am-0835am.csv"
batch_size = 64

chunk_size = 100000 
dataframes = []
for chunk in pd.read_csv(csv_file_path, chunksize=chunk_size):
    dataframes.append(chunk)


df = pd.concat(dataframes, ignore_index=True)
df['Unique_ID'] = df['Vehicle_ID'].astype(str) + df['Total_Frames'].astype(str)

dataframes_dict = {}

grouped = df.groupby('Unique_ID')

for group_name, group_df in grouped:
    dataframes_dict[group_name] = group_df


In [None]:
Car_ID='7031523' #Choose any Unique ID
df2=dataframes_dict[Car_ID]
df2=df2[['Local_X','Local_Y','Lane_ID','Preceding','Following','v_Vel']]
normalized_data = (df2 - df2.mean()) / df2.std()
feature_columns = ['Local_X', 'Local_Y', 'v_Vel']
target_columns = ['v_Vel']


sequence_length = 10  

feature_data = normalized_data[feature_columns].values
target_data = normalized_data[target_columns].values
df3=dataframes_dict[Car_ID]
df3=df3[['Local_X','Local_Y','Lane_ID','Preceding','Following','v_Vel']]
sequences, targets = create_sequences(feature_data, target_data, sequence_length)
sequences = torch.tensor(sequences, dtype=torch.float16)
targets = torch.tensor(targets, dtype=torch.float16)
vehicle_data = TensorDataset(sequences, targets)
vehicle_loader = DataLoader(vehicle_data, batch_size=batch_size, shuffle=False)

loss_function = torch.nn.MSELoss()  # Mean Squared Error Loss for regression tasks

# Evaluate on test data
def evaluate(model, test_loader, loss_function):
    model.eval()  # Set the model to evaluation mode
    test_losses = []
    with torch.no_grad():  # No gradients required for testing
        for seq, target in test_loader:
            seq, target = seq.float(), target.float()
            output = model(seq)
            loss = loss_function(output, target)
            test_losses.append(loss.item())
    return np.mean(test_losses)

test_loss = evaluate(model, vehicle_loader, loss_function)
print(f'Test Loss: {test_loss:.4f}')

Plot predictions

In [None]:
actuals, predictions = [], []

# Function to predict and visualize the results
def plot_predictions(model, loader):
    model.eval()
    with torch.no_grad():
        for seq, target in loader:
            seq, target = seq.float(), target.float()
            output = model(seq)
            actuals.extend(target.numpy())
            predictions.extend(output.numpy())

    # Plotting the first few sequences
    plt.figure(figsize=(10, 5))
    plt.plot(actuals, label='Actual')
    plt.plot(predictions, label='Predicted')
    plt.title('Comparison of Actual and Predicted Values')
    plt.xlabel('Time Step')
    plt.ylabel('Values')
    plt.legend()
    plt.show()

plot_predictions(model, vehicle_loader)

In [None]:
actuals = np.array(actuals)
predictions = np.array(predictions)
# actual_velocities, actual_accelerations = actuals[:, 0], actuals[:, 1]
# predicted_velocities, predicted_accelerations = predictions[:, 0], predictions[:, 1]
actual_velocities = actuals[:, 0]
predicted_velocities = predictions[:, 0]

#De-normalising Predictions
actual_velocities=actual_velocities*df2.std()['v_Vel']+df2.mean()['v_Vel']
predicted_velocities=predicted_velocities*(df2.std())['v_Vel']+df2.mean()['v_Vel']
# print(actual_velocities[0], actual_accelerations[0])
# print(predicted_velocities[0], predicted_accelerations[0])

fig, axs = plt.subplots(1, figsize=(10, 10))

# Plot actual values
axs[0].plot(actual_velocities, label='Actual Velocity', color='blue')
axs[0].plot(predicted_velocities, label='Predicted Velocity', color='orange')
axs[0].set_title('Comparison of Actual and Predicted Velocities')
axs[0].set_xlabel('Time Step')
axs[0].set_ylabel('Velocity')
axs[0].legend()

*Anomaly Detection*

In [None]:
# Calculate the absolute differences for velocity
velocity_diff = np.abs(actual_velocities - predicted_velocities)

# Plotting the box plot for velocity differences
plt.figure(figsize=(10, 6))
plt.boxplot(velocity_diff, vert=False, patch_artist=True)  # Use patch_artist to fill the box with color
plt.title('Box Plot of Velocity Differences Between Actual and Predicted Values')
plt.xlabel('Difference')

# Calculate the IQR and determine the outlier threshold for velocity
q1, q3 = np.percentile(velocity_diff, [25, 75])
iqr = q3 - q1
outlier_threshold = q3 + 1.5 * iqr
plt.axvline(x=outlier_threshold, color='r', linestyle='--', label=f'Outlier Threshold at {outlier_threshold:.2f}')

plt.legend()
plt.show()

# Print the calculated threshold which can be used for anomaly detection
print("Suggested anomaly detection threshold:", outlier_threshold)

In [None]:
#mean_velocity = torch.mean(train_targets[:, 0])  # Assuming first target is velocity
#std_velocity = torch.std(train_targets[:, 0])

# Define a function to detect anomalies
def detect_velocity_anomalies(actual_velocity, predicted_velocity, threshold=0.1):
    anomalies = []
    velocity_diff = np.abs(actual_velocity - predicted_velocity)
    for i, diff in enumerate(velocity_diff):
        if diff > threshold:
            anomalies.append((i, actual_velocity[i], predicted_velocity[i]))
    return anomalies

# Get anomalies in the predictions
velocity_anomalies = detect_velocity_anomalies(actual_velocities, predicted_velocities, threshold=outlier_threshold)
print("Velocity Anomalies (Index, Actual, Predicted):", velocity_anomalies)

# Calculate the percentage of velocity points that are anomalous
velocity_anomalies_percent = (len(velocity_anomalies) / len(actual_velocities)) * 100

# Print results
print(f"Percentage of velocity points that are anomalous: {velocity_anomalies_percent:.2f}%")



In [None]:
if velocity_anomalies:
    velocity_anomaly_indices, _, velocity_anomaly_values = zip(*velocity_anomalies)
else:
    velocity_anomaly_indices, velocity_anomaly_values = [], []

# Plot actual + predicted + anomalies
plt.figure(figsize=(15, 10))
plt.plot(actual_velocities, label='Actual Velocity', color='blue')
plt.plot(predicted_velocities, label='Predicted Velocity', color='orange')
plt.scatter(velocity_anomaly_indices, velocity_anomaly_values, color='red', s=50, label='Anomalies', zorder=5)
plt.title(f'Comparison of Actual and Predicted Velocities with Anomalies - Unique ID {Car_ID}')
plt.xlabel('Time Step')
plt.ylabel('Velocity')
plt.legend()