# Sample notebook for codeblocks

Run `experimental.ipynb` to get the files out

## Loading Data

In [31]:
import os
import sys

# Add parent folder to sys.path
sys.path.append(os.path.abspath(".."))
from functools import reduce

import math
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pywt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from helpers.load_data import load_and_merge_sensors
from helpers.filtering import preprocess_sensor_data
from helpers.segment import create_windows
import helpers.dtwlabeling

import seglearn as seg
import tensorflow as tf
import sklearn

In [32]:
training = pd.read_pickle('../data/training.pkl')
test = pd.read_pickle('../data/test.pkl')
times = pd.read_pickle('../data/time.pkl')

In [33]:
X = training['data']
test = test['test_data']
times = times['times_df']

In [34]:
X, test, times

(            acc_x_u1  acc_y_u1  acc_z_u1  gyr_x_u1  gyr_y_u1  gyr_z_u1  \
 time index                                                               
 1          -0.000301 -0.001568  0.001243  0.000050 -0.000418 -0.000081   
 2           0.003782 -0.012973 -0.000410  0.015997 -0.006485 -0.016916   
 3           0.031281 -0.024813 -0.012871  0.018531 -0.004887 -0.012574   
 4           0.038315 -0.011346 -0.018031  0.012086 -0.005786 -0.010628   
 5           0.016355  0.025431 -0.023982  0.008687 -0.009128 -0.018874   
 ...              ...       ...       ...       ...       ...       ...   
 941         0.009207 -0.020540 -0.035409 -0.006236 -0.004397 -0.015395   
 942         0.006362 -0.020626 -0.052109 -0.005075 -0.001300 -0.010506   
 943         0.003737 -0.019320 -0.041284 -0.007229 -0.004216 -0.009475   
 944         0.008194 -0.019128 -0.022053 -0.005957 -0.007827 -0.010482   
 945         0.003609 -0.006854 -0.009351  0.000703 -0.000224  0.000250   
 
             mag_x_u1  

In [35]:
X_seg, y_seg, sub_window = create_windows(X, window_size=50, stride=25)

In [36]:
X_seg.shape, y_seg.shape, sub_window

((2206, 50, 45),
 (2206,),
 array(['s1', 's1', 's1', ..., 's5', 's5', 's5'],
       shape=(2206,), dtype='<U2'))

## Sample Training

In [37]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Flatten, Dense, Dropout, BatchNormalization

def build_model(window_size, num_sensors, num_classes, use_lstm=False):
    """
    Builds a Keras model for multivariate time-series sensor data.

    Parameters:
    - window_size: int, number of timesteps per window
    - num_sensors: int, number of sensor channels
    - num_classes: int, number of output classes
    - use_lstm: bool, if True adds an LSTM layer after CNN

    Returns:
    - Keras compiled model
    """
    model = Sequential()
    
    # First CNN layer
    model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(window_size, num_sensors)))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    
    # Second CNN layer
    model.add(Conv1D(128, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    
    if use_lstm:
        model.add(LSTM(64, return_sequences=False))
    else:
        model.add(Flatten())
    
    # Dense layers
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    return model

In [38]:
pd.unique(sub_window)

array(['s1', 's2', 's3', 's4', 's5'], dtype='<U2')

In [39]:
from tensorflow.keras.utils import to_categorical
import numpy as np

subjects = np.unique(pd.unique(sub_window))
num_classes = 8  # 8

loso_accuracies = []

for test_subject in np.unique(sub_window):
    train_idx = sub_window != test_subject
    test_idx = sub_window == test_subject

    X_train = X_seg[train_idx]
    y_train = y_seg[train_idx]
    X_test = X_seg[test_idx]
    y_test = y_seg[test_idx]

    # One-hot encode labels
    y_train_onehot = to_categorical(y_train - 1, num_classes=num_classes)
    y_test_onehot = to_categorical(y_test - 1, num_classes=num_classes)

    # Build your model
    model = build_model(window_size=X_train.shape[1], num_sensors=X_train.shape[2], num_classes=num_classes)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train
    model.fit(X_train, y_train_onehot, epochs=20, batch_size=32, verbose=0)

    # Evaluate
    loss, acc = model.evaluate(X_test, y_test_onehot, verbose=0)
    print(f"Test subject {test_subject}: accuracy = {acc:.3f}")
    loso_accuracies.append(acc)

print(f"Average LOSO accuracy: {np.mean(loso_accuracies):.3f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Test subject s1: accuracy = 0.904
Test subject s2: accuracy = 0.855
Test subject s3: accuracy = 0.995
Test subject s4: accuracy = 0.911
Test subject s5: accuracy = 0.982
Average LOSO accuracy: 0.930
