In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.metrics import precision_score, recall_score

from pathlib import Path
import re
import json

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    LSTM,
    Bidirectional,
    Conv1D,
    Dense,
    Flatten,
    Input,
    MaxPooling1D,
    TimeDistributed,
    Dropout
)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
def extract_class(filename):
    match = re.search(r'benchy_\d+_(.*?)\.parquet\.gzip', filename)
    if match:
        return match.group(1)
    return None

In [None]:
def process_data(dataset,label):
    p_data = np.empty((0,9))

    for sample in dataset:
        pca = PCA(n_components=1)
        pca.fit(sample)
        data = np.hstack([np.mean(sample,axis=0), np.std(sample,axis=0), pca.components_[0]])
        p_data = np.vstack((p_data,data))
    
    label_column = np.ones((p_data.shape[0], 1))*label
    p_data = np.hstack((p_data,label_column))
    return p_data

In [None]:
def process_data(dataset,label):
    p_data = np.empty((0,9))

    for sample in dataset:
        pca = PCA(n_components=1)
        pca.fit(sample)
        data = np.hstack([np.mean(sample,axis=0), np.std(sample,axis=0), pca.components_[0]])
        p_data = np.vstack((p_data,data))
    
    label_column = np.ones((p_data.shape[0], 1))*label
    p_data = np.hstack((p_data,label_column))
    return p_data

In [None]:
def window_using_rolling(df,window=44,step=22,columns=["accel_x","accel_y","accel_z"]):
    rolling = df[columns].rolling(window=window,step=step)
    return np.array(list(rolling)[2:])

In [None]:
sample_rate = 3200
print_resolution = 0.1
print_speed = 60
minimum_print_steps = 2
min_print_window = minimum_print_steps*print_resolution/print_speed
samples_per_window = min_print_window*sample_rate
print(f"Minimum print window: {min_print_window}")
print(f"Samples per window: {samples_per_window}")

### Processing Data

In [None]:
df_all = pd.DataFrame()
list_classes = set()
for file in Path("downsampled_200_filter").glob("benchy_*"):
    list_classes.add(extract_class(file.name))
print(list_classes)

class_index = dict()
for i,class_name in enumerate(list_classes):
    class_index[class_name] = int(i)
print(class_index)

Raw data

In [None]:
# full_data = np.empty((0,10))

# for file in list(Path("downsampled_200").glob("benchy_*")):
#     file_class = extract_class(file.name)
#     df = pd.read_parquet(file)
#     data = window_using_rolling(df)
#     processed_data = process_data(data,class_index[file_class])
#     full_data = np.vstack((full_data,processed_data))

Normalized data

In [None]:
full_data = np.empty((0,10))

for file in list(Path("downsampled_200").glob("benchy_*")):
    file_class = extract_class(file.name)
    df = pd.read_parquet(file)
    df["accel_x"] = (df["accel_x"]-df["accel_x"].mean())/df["accel_x"].std()
    df["accel_y"] = (df["accel_y"]-df["accel_y"].mean())/df["accel_y"].std()
    df["accel_z"] = (df["accel_z"]-df["accel_z"].mean())/df["accel_z"].std()
    data = window_using_rolling(df)
    processed_data = process_data(data,class_index[file_class])
    full_data = np.vstack((full_data,processed_data))

In [None]:
np.save("processed_labeled_normalized_data",full_data,fix_imports=False)

### Training

In [None]:
with open('processed_labeled_normalized_data.npy', 'rb') as f:
    loaded_data = np.load(f)

x_train = loaded_data[:,:9]
y_train = loaded_data[:,9]
x_train = x_train.reshape((x_train.shape[0],1,x_train.shape[1]))
y_train = y_train.reshape((y_train.shape[0],1))
y_train_categorical = to_categorical(y_train,num_classes=np.unique(y_train).shape[0])

#### Sequential Model

In [None]:
# Define the model
model = Sequential()

# Add LSTM layer with 100 units
model.add(LSTM(100, input_shape=(1, 9), return_sequences=False))

# Add Dropout layer
model.add(Dropout(0.2))  # Adjust dropout rate as needed

# Add output layer
model.add(Dense(6, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["categorical_accuracy","accuracy"])

# Summary of the model
model.summary()

In [None]:
checkpoint_callback = ModelCheckpoint(
    filepath='best_vanilla_model.keras',       # Filepath to save the model
    monitor='val_accuracy',            # Monitor validation accuracy
    save_best_only=True,               # Save only the best model
    save_weights_only=False,           # Save the full model (architecture + weights)
    mode='max',                        # Save when the monitored quantity is maximized
    verbose=1                          # Verbosity mode
)

In [None]:
# Train
history = model.fit(
    x_train,
    y_train_categorical,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[checkpoint_callback]
)

In [None]:
with open('training_history_vanilla.json', 'w') as f:
    json.dump(history.history, f)

In [None]:
y_pred = model.predict(x_train)

# Convert predictions to class labels
y_pred_classes = np.argmax(y_pred, axis=1)
y_train_classes = np.argmax(y_train, axis=1)

# Calculate precision and recall for multi-class classification
precision = precision_score(y_train_classes, y_pred_classes, average='macro')
recall = recall_score(y_train_classes, y_pred_classes, average='macro')

print(f"Precision: {precision}")
print(f"Recall: {recall}")

#### Stacked Model

In [None]:
# Define the model
model_stacked = Sequential()

# Add LSTM layer with 100 units
model_stacked.add(LSTM(50, input_shape=(1, 9), return_sequences=True))
model_stacked.add(LSTM(50))

# Add Dropout layer
model_stacked.add(Dropout(0.2))  # Adjust dropout rate as needed

# Add output layer
model_stacked.add(Dense(6, activation='softmax'))

# Compile the model_stacked
model_stacked.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["categorical_accuracy","accuracy"])

# Summary of the model_stacked
model_stacked.summary()

In [None]:
checkpoint_callback = ModelCheckpoint(
    filepath='best_2_stack_model.keras',# Filepath to save the model
    monitor='val_accuracy',            # Monitor validation accuracy
    save_best_only=True,               # Save only the best model
    save_weights_only=False,           # Save the full model (architecture + weights)
    mode='max',                        # Save when the monitored quantity is maximized
    verbose=1                          # Verbosity mode
)

In [None]:
# Train
history2 = model_stacked.fit(
    x_train,
    y_train_categorical,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[checkpoint_callback]
)

In [None]:
with open('training_history_stacked.json', 'w') as f:
    json.dump(history2.history, f)

In [None]:
y_pred = model_stacked.predict(x_train)

# Convert predictions to class labels
y_pred_classes = np.argmax(y_pred, axis=1)
y_train_classes = np.argmax(y_train, axis=1)

# Calculate precision and recall for multi-class classification
precision = precision_score(y_train_classes, y_pred_classes, average='macro')
recall = recall_score(y_train_classes, y_pred_classes, average='macro')

print(f"Precision: {precision}")
print(f"Recall: {recall}")

#### Bidirectional Model

In [None]:
# Define the model
model_bi = Sequential()

# Add LSTM layer with 100 units
model_bi.add(Bidirectional(LSTM(units=100), input_shape=(1, 9)))

# Add Dropout layer
model_bi.add(Dropout(0.2))  # Adjust dropout rate as needed

# Add output layer
model_bi.add(Dense(6, activation='softmax'))

# Compile the model_bi
model_bi.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["categorical_accuracy","accuracy"])

# Summary of the model_bi
model_bi.summary()

In [None]:
checkpoint_callback = ModelCheckpoint(
    filepath='best_bidirectional_model.keras',# Filepath to save the model
    monitor='val_accuracy',            # Monitor validation accuracy
    save_best_only=True,               # Save only the best model
    save_weights_only=False,           # Save the full model (architecture + weights)
    mode='max',                        # Save when the monitored quantity is maximized
    verbose=1                          # Verbosity mode
)

In [None]:
# Train
history3 = model_bi.fit(
    x_train,
    y_train_categorical,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[checkpoint_callback]
)

In [None]:
with open('training_history_bi.json', 'w') as f:
    json.dump(history3.history, f)

In [None]:
y_pred = model_bi.predict(x_train)

# Convert predictions to class labels
y_pred_classes = np.argmax(y_pred, axis=1)
y_train_classes = np.argmax(y_train, axis=1)

# Calculate precision and recall for multi-class classification
precision = precision_score(y_train_classes, y_pred_classes, average='macro')
recall = recall_score(y_train_classes, y_pred_classes, average='macro')

print(f"Precision: {precision}")
print(f"Recall: {recall}")