In [1]:
import numpy as np
import os
from scipy.io import loadmat
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pickle

In [2]:
# Define the root path
root_path = '/home/kvadner/Fault Detection/nav-dyna/Simulation Results/'
data_folder = '/home/kvadner/Fault Detection/LSTM-Fault-Detection/data2'
# Initialize a LabelEncoder to convert maneuver names into categorical variables
label_encoder = LabelEncoder()
maneuvers = [name for name in os.listdir(root_path) if os.path.isdir(os.path.join(root_path, name))]
maneuver_encoding = label_encoder.fit_transform(maneuvers)
maneuver_dict = dict(zip(maneuvers, maneuver_encoding))
print(maneuver_dict)

sequence_length = 35

{'SLS': 5, 'SRS': 6, 'RSL': 3, 'RSR': 4, 'ISO Double Lane Change': 0, 'LSR': 2, 'LSL': 1}


In [3]:
# Function to create sequences including maneuver code, now converting to float32
def create_sequences(group, sequence_length, one_hot_label):
    X, maneuver_labels, y = [], [], []
    for i in range(len(group['u']) - 2 * sequence_length + 1):
        X_sub_seq, Y_sub_seq = [], []
        for variable, data in group.items():
            X_seq = data[i:(i + sequence_length)]
            y_seq = data[i + sequence_length:i + 2 * sequence_length]
            X_sub_seq.append(X_seq)
            Y_sub_seq.append(y_seq)
        X.append(X_sub_seq)
        y.append(Y_sub_seq)
        maneuver_labels.append(one_hot_label)
    return np.array(X, dtype=np.float32), np.array(maneuver_labels, dtype=np.float32), np.array(y, dtype=np.float32)

In [4]:
features = ['theta', 'v', 'r', 'ay', 'force', 'u','xu', 'xy']
# Collect all data and labels
def collect_all_data(root_path, maneuver_dict):
    all_sequences = []
    labels = []
    for maneuver_name in maneuver_dict.keys():
        maneuver_path = os.path.join(root_path, maneuver_name)
        maneuver_code = maneuver_dict[maneuver_name]
        for velocity_folder in os.listdir(maneuver_path):
            velocity_path = os.path.join(maneuver_path, velocity_folder)
            for file in os.listdir(velocity_path):
                if file.endswith('.mat'):
                    file_path = os.path.join(velocity_path, file)
                    sim_result = loadmat(file_path)

                    # Collect each variable as a separate sequence and group them
                    group = {variable: sim_result[variable].flatten() for variable in features}
                    all_sequences.append(group)
                    labels.append(maneuver_code)  # The same maneuver code for the entire group
    return all_sequences, labels


In [5]:
all_data, all_labels = collect_all_data(root_path, maneuver_dict)

In [6]:
scalers = {variable: StandardScaler() for variable in all_data[0].keys()}
for variable in scalers.keys():
    # Concatenate data for each variable across all groups for scaling
    variable_data = np.concatenate([group[variable] for group in all_data])
    scalers[variable].fit(variable_data.reshape(-1, 1))

for variable, scaler in scalers.items():
    with open(f'scaler_{variable}.pkl', 'wb') as file:
        pickle.dump(scaler, file)
# Apply scaling to each variable in each group
scaled_data = []
for group in all_data:
    scaled_group = {}
    for variable, data in group.items():
        scaled_group[variable] = scalers[variable].transform(data.reshape(-1, 1)).flatten()
    scaled_data.append(scaled_group)

In [7]:
# Step 2: One-hot encode the maneuver labels
one_hot_encoder = OneHotEncoder(sparse=False)
one_hot_labels = one_hot_encoder.fit_transform(np.array(all_labels).reshape(-1, 1))



In [8]:
# These will hold all the sequence data and labels
sequence_data = []
sequence_labels = []
sequence_output = []

# Iterate through all_data and all_labels to create sequences
for i in range(len(scaled_data)):
    X, maneuver_label, y = create_sequences(scaled_data[i], sequence_length, one_hot_labels[i])
    sequence_data.append(X)
    sequence_labels.append(maneuver_label)
    sequence_output.append(y)

# Convert to numpy arrays
final_X = np.concatenate(sequence_data, axis=0)
final_maneuver_labels = np.concatenate(sequence_labels, axis=0)
final_y = np.concatenate(sequence_output, axis=0)

In [9]:
print(final_X.shape)
print(final_maneuver_labels.shape)
print(final_y.shape)
# Save the complete dataset
np.save(os.path.join(data_folder, 'X_data.npy'), final_X)
np.save(os.path.join(data_folder, 'maneuver_labels.npy'), final_maneuver_labels)
np.save(os.path.join(data_folder, 'Y_data.npy'), final_y)

print("Completed processing all maneuvers")


(3219931, 8, 35)
(3219931, 7)
(3219931, 8, 35)
Completed processing all maneuvers
