# Assignment 3: Fall Detection

Author:
Matthieu Beylard

## Imports

In [39]:
import numpy as np
import pandas as pd
import glob
import re
import warnings
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers.legacy import Adam
from sklearn.model_selection import train_test_split

## Preprocess label and sensor datasets

In [72]:
warnings.simplefilter(action='ignore', category=FutureWarning)

# Function to find a task from text
def find_task(text):
    if pd.isnull(text):
        return None
    else:
        match = re.search(r'\((\d+)\)', str(text))
        if match:
            return int(match.group(1))
        else:
            return None

subjects = [f"{i:02d}" for i in range(6, 39) if i != 34] # No data for subject 34

total_labels_data = []
total_sensor_data = []

for subject in subjects:
    # Preprocess label files (generate DataFrame, extract data, fill NaN values, apply find_task on Task ID)
    label_path = "label_data/SA" + subject + "_label.xlsx"
    df = pd.read_excel(label_path, sheet_name = 'Sheet1')

    data_label = df.iloc[:, [0, 2, 3, 4]]
    data_label = data_label.fillna(method = 'ffill')

    data_label.iloc[:, 0] = data_label.iloc[:, 0].apply(find_task)

    start = df['Fall_onset_frame']
    stop = df['Fall_impact_frame']


    # Extract indexes from sensor files
    sensor_files = glob.glob('sensor_data/SA' + subject + '/**/*.csv', recursive=True)
    sensor_files.sort()

    indexes = []
    for file in sensor_files:
        match = re.search(r'T(\d{2})R', file)
        extracted_index = match.group(1)
        indexes.append(int(extracted_index))

    count_phase_1 = 0
    for idx in indexes:
        if idx < 20: # No fall in the first 19 tasks (phase 1)
            count_phase_1 += 1

    count_phase_2 = count_phase_1
    count_total_falls = 0
    for idx in indexes:
        if (20 <= idx < 35): # Falls in tasks 20 to 34 (phase 2)
            count_phase_2 += 1
            count_total_falls += 1

    count_phase_3 = count_phase_2
    for idx in indexes:
        if idx >= 35:  # No fall in the last 2 tasks (phase 3)
            count_phase_3 += 1


    # Extract data for different types of tasks
    phase_1 = []
    for i in range(0, count_phase_1):
        df = pd.read_csv(sensor_files[i])
        frame = df['FrameCounter'].to_numpy()
        phase_1.extend(frame)

    phase_2 = []
    for i in range(count_phase_1, count_phase_2):
        df = pd.read_csv(sensor_files[i])
        frame = df['FrameCounter'].to_numpy()
        phase_2.append(frame)

    phase_3 = []
    for i in range(count_phase_2, count_phase_3):
        df = pd.read_csv(sensor_files[i])
        frame = df['FrameCounter'].to_numpy()
        phase_3.extend(frame)  


    # Generate labels for no fall and falls periods and concatenate labels
    labels_phase_1 = np.zeros(len(phase_1))
    labels_phase_3 = np.zeros(len(phase_3))

    labels_phase_2 = []
    for j in range(count_total_falls):
        labels = np.zeros(phase_2[j].size)
        for k in range(start[j]-1, stop[j]):
            labels[k] = 1
        labels_phase_2 = np.concatenate((labels_phase_2, labels))

    labels_data = np.concatenate((labels_phase_1,labels_phase_2,labels_phase_3))


    # Concatenate all CSV files into one DataFrame and save to CSV
    merged_sensor_df = pd.DataFrame()

    for file in sensor_files:
        df = pd.read_csv(file)
        merged_sensor_df = pd.concat([merged_sensor_df, df], ignore_index=True)

    merged_sensor_df.to_csv ('sensor_dataset.csv', index=False)

    sensor_data = merged_sensor_df.to_numpy()


    # Extend total labels data and sensor data lists
    total_labels_data.extend(labels_data)
    total_sensor_data.extend(sensor_data)

## LSTM Neural Network

In [74]:
# Transform the label data and sensor data lists into numpy arrays
total_labels_data = np.array(total_labels_data)
total_sensor_data = np.array(total_sensor_data)

# Train-Test split the data
X_train, X_test, y_train, y_test = train_test_split(total_sensor_data, total_labels_data, test_size=0.33, random_state=1)

# Reshape X_train for the LSTM model
X_train_reshaped = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))

# Design the LSTM model
model = Sequential()

# LSTM layer (return_sequences=True captures temporal dependencies)
model.add(LSTM(units=64, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2]), return_sequences=True))
model.add(Dropout(0.2))

# Another LSTM layer
model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.2))

# LSTM layer (no return_sequences for a single output)
model.add(LSTM(units=64))
model.add(Dropout(0.2))

# Dense layer for final classification
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_reshaped, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x2ddde2750>

## Discussion

The architecture of the neural network was designed using three LSTM layers, a dense layer, 20 epochs and a batch size of 32. 

The LSTM layers are specialized RNN layers that focus on capturing temporal dependencies and learning sequential patterns, while the dense layer are feedforward NN layers where each neuron is connected to every neuron in the previous layer, to make the final classification decision based on the features learned by the LSTM layers.

The number of epochs determines how many times the model will iterate over the entire training dataset. I chose to put 20 epochs to ensure the model adapted nicely. One could even make more iterations for as long as the loss and accuracy values get better, these values worsening indicating overfitting.

The batch size determines how many samples are propagated through the network before the weights are updated. We want to keep small batch sizes to have a fast convergence, but not too low to avoid noisy updates. 

Dropout is a technique used to prevent overfitting and improve generalization by randomly dropping a proportion of neurons (20%) from the network during each training epoch. Stopping methods also prevent overfitting and improve generalization, for example by using the EarlyStop() method. Optimizers update the weights of the neural network based on the loss function and the gradients of the parameters. We used Adam, because of its ability to use RMS propagation, correct bias and adapt the learning rate for each parameter individually, making it quite robust.