In [2]:
import os
import pandas as pd

def load_data_from_files(data_dir):
    all_data = []
    for filename in os.listdir(data_dir):
        if filename.endswith('.csv'):
            # Extract metadata from filename
            collection_id, step_info, _ = filename.split('_', 2)
            step_number = ''.join(filter(str.isdigit, step_info))
            foot = 'R' if 'R' in step_info else 'L'
            filepath = os.path.join(data_dir, filename)
            df = pd.read_csv(filepath)
            df['collection_id'] = collection_id
            df['step_number'] = int(step_number)
            df['foot'] = foot
            df['filename'] = filename  # Keep track of the file
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

data_dir = 'csv_output'  # Replace with your directory
data = load_data_from_files(data_dir)

In [3]:
data

Unnamed: 0,time,gyroscope_x,gyroscope_y,gyroscope_z,accelerometer_x,accelerometer_y,accelerometer_z,event,collection_id,step_number,foot,filename
0,2024-06-14 10:55:50.108,-176.225,-420.175,-26.740,5.992396,-0.633180,-5.532456,1,BsxSnoTsRktvZsac1xw4,10,L,BsxSnoTsRktvZsac1xw4_10 L_sensor_data_with_eve...
1,2024-06-14 10:55:50.113,-191.625,-247.345,-23.870,-1.918572,0.501420,1.586976,1,BsxSnoTsRktvZsac1xw4,10,L,BsxSnoTsRktvZsac1xw4_10 L_sensor_data_with_eve...
2,2024-06-14 10:55:50.118,-183.610,-305.025,-4.900,0.643428,-0.413824,-1.687748,1,BsxSnoTsRktvZsac1xw4,10,L,BsxSnoTsRktvZsac1xw4_10 L_sensor_data_with_eve...
3,2024-06-14 10:55:50.122,-169.610,-285.460,-3.150,-0.540460,0.331108,-2.173064,0,BsxSnoTsRktvZsac1xw4,10,L,BsxSnoTsRktvZsac1xw4_10 L_sensor_data_with_eve...
4,2024-06-14 10:55:50.127,-176.610,-286.510,-7.035,-0.383568,0.077592,-1.338828,0,BsxSnoTsRktvZsac1xw4,10,L,BsxSnoTsRktvZsac1xw4_10 L_sensor_data_with_eve...
...,...,...,...,...,...,...,...,...,...,...,...,...
508441,2024-06-14 09:32:38.284,46.445,-48.265,-48.825,4.747508,1.361032,-1.638948,0,hiG83ssX2DUOVgSmNQWr,12,L,hiG83ssX2DUOVgSmNQWr_12 L_sensor_data_with_eve...
508442,2024-06-14 09:32:38.289,13.265,-87.465,-67.375,4.352228,1.148752,-1.632848,0,hiG83ssX2DUOVgSmNQWr,12,L,hiG83ssX2DUOVgSmNQWr_12 L_sensor_data_with_eve...
508443,2024-06-14 09:32:38.294,-12.320,-121.380,-84.035,3.929864,0.920856,-1.625772,0,hiG83ssX2DUOVgSmNQWr,12,L,hiG83ssX2DUOVgSmNQWr_12 L_sensor_data_with_eve...
508444,2024-06-14 09:32:38.299,-26.005,-146.440,-98.210,3.452356,0.619272,-1.646512,1,hiG83ssX2DUOVgSmNQWr,12,L,hiG83ssX2DUOVgSmNQWr_12 L_sensor_data_with_eve...


In [2]:
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

def preprocess_data(data):
    # Define features and target
    features = ['gyroscope_x', 'gyroscope_y', 'gyroscope_z',
                'accelerometer_x', 'accelerometer_y', 'accelerometer_z']
    target = 'event'

    # Drop rows with missing values
    data = data.dropna(subset=features + [target])

    # Normalize features
    scaler = MinMaxScaler()
    data[features] = scaler.fit_transform(data[features])

    # Encode target labels
    label_encoder = LabelEncoder()
    data[target] = label_encoder.fit_transform(data[target])

    return data, label_encoder

data, label_encoder = preprocess_data(data)

In [3]:
import torch
from torch.utils.data import Dataset

class SensorDataset(Dataset):
    def __init__(self, data, features, target, sequence_length):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        self.sequences = []
        self.labels = []

        grouped = data.groupby('filename')

        for _, group in grouped:
            group = group.reset_index(drop=True)
            group_length = len(group)
            if group_length >= sequence_length:
                # Generate sequences using a sliding window
                for i in range(group_length - sequence_length + 1):
                    seq = group.iloc[i:i+sequence_length]
                    self.sequences.append(seq[self.features].values)
                    # Use the event at the last time point as the label
                    self.labels.append(seq[self.target].values[-1])

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        X = self.sequences[idx]
        y = self.labels[idx]
        return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# Parameters
features = ['gyroscope_x', 'gyroscope_y', 'gyroscope_z',
            'accelerometer_x', 'accelerometer_y', 'accelerometer_z']
target = 'event'
sequence_length = 50  # hyperparameter to be tuned

dataset = SensorDataset(data, features, target, sequence_length)

In [4]:
from torch.utils.data import random_split

# Set random seed for reproducibility
torch.manual_seed(42)

# 70 - 15 - 15 split
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Split dataset
train_dataset, val_dataset, test_dataset = random_split(
    dataset, [train_size, val_size, test_size])

# Create DataLoaders
from torch.utils.data import DataLoader

batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

In [5]:
import torch.nn as nn

class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes, dropout=0.5):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers,
                            batch_first=True, dropout=dropout)

        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        # x shape: (batch_size, seq_length, input_dim)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # Take output from the last time step
        return out

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_dim = len(features)
hidden_dim = 64
num_layers = 2
num_classes = len(label_encoder.classes_)
learning_rate = 0.001
num_epochs = 20

model = LSTMClassifier(input_dim, hidden_dim, num_layers, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

            running_loss += loss.item() * X_batch.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [28]:
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    print(f'Epoch {epoch+1}/{num_epochs}, '
          f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, '
          f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

Epoch 1/20, Train Loss: 0.2368, Train Acc: 0.9372, Val Loss: 0.1887, Val Acc: 0.9382
Epoch 2/20, Train Loss: 0.1890, Train Acc: 0.9392, Val Loss: 0.1851, Val Acc: 0.9404
Epoch 3/20, Train Loss: 0.1861, Train Acc: 0.9400, Val Loss: 0.1914, Val Acc: 0.9366
Epoch 4/20, Train Loss: 0.1848, Train Acc: 0.9403, Val Loss: 0.1855, Val Acc: 0.9409
Epoch 5/20, Train Loss: 0.1838, Train Acc: 0.9405, Val Loss: 0.1825, Val Acc: 0.9409
Epoch 6/20, Train Loss: 0.1826, Train Acc: 0.9408, Val Loss: 0.1815, Val Acc: 0.9410
Epoch 7/20, Train Loss: 0.1825, Train Acc: 0.9408, Val Loss: 0.1844, Val Acc: 0.9404
Epoch 8/20, Train Loss: 0.1819, Train Acc: 0.9408, Val Loss: 0.1824, Val Acc: 0.9407
Epoch 9/20, Train Loss: 0.1815, Train Acc: 0.9410, Val Loss: 0.1832, Val Acc: 0.9397
Epoch 10/20, Train Loss: 0.1810, Train Acc: 0.9410, Val Loss: 0.1816, Val Acc: 0.9407
Epoch 11/20, Train Loss: 0.1809, Train Acc: 0.9411, Val Loss: 0.1812, Val Acc: 0.9412
Epoch 12/20, Train Loss: 0.1814, Train Acc: 0.9408, Val Loss: 0

In [None]:
test_loss, test_acc = validate(model, test_loader, criterion, device)
print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')

Test Loss: 0.1732, Test Acc: 0.9431


In [38]:
from sklearn.metrics import classification_report
import numpy as np

model.eval()

# Collect predictions and true labels
all_preds = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        outputs = model(X_batch)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(y_batch.cpu().numpy())

# Define all possible labels
labels = range(num_classes)  # Ensure num_classes is set correctly (should be 5)

# Manually define class names
class_names = ['no event', 'heel strike', 'foot flat', 'heel off', 'toe off']

# Generate the classification report
print(classification_report(all_labels, all_preds, labels=labels, target_names=class_names))


              precision    recall  f1-score   support

    no event       0.94      1.00      0.97     55490
 heel strike       0.76      0.52      0.61       798
   foot flat       0.00      0.00      0.00         0
    heel off       0.00      0.00      0.00      1386
     toe off       0.36      0.00      0.01      1462

    accuracy                           0.94     59136
   macro avg       0.41      0.30      0.32     59136
weighted avg       0.91      0.94      0.92     59136



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [31]:
# After training is complete
model_save_path = 'lstm_model.pth'
torch.save(model.state_dict(), model_save_path)
print(f"Model weights saved to {model_save_path}")

Model weights saved to lstm_model.pth


In [8]:
# Initialize the model architecture
loaded_model = LSTMClassifier(input_dim, hidden_dim, num_layers, num_classes)
loaded_model.to(device)

# Load the saved weights
model_load_path = 'lstm_model.pth'
loaded_model.load_state_dict(torch.load(model_load_path))
print(f"Model weights loaded from {model_load_path}")

# Set the model to evaluation mode
loaded_model.eval()

Model weights loaded from lstm_model.pth


  loaded_model.load_state_dict(torch.load(model_load_path))


LSTMClassifier(
  (lstm): LSTM(6, 64, num_layers=2, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=64, out_features=5, bias=True)
)

In [9]:
# Evaluate on the test set
test_loss, test_acc = validate(loaded_model, test_loader, criterion, device)
print(f'Loaded Model Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')

Loaded Model Test Loss: 0.1732, Test Acc: 0.9431


In [10]:
from sklearn.metrics import classification_report
import numpy as np

# Collect predictions and true labels
all_preds = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        outputs = loaded_model(X_batch)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(y_batch.cpu().numpy())

# Define all possible labels
labels = range(num_classes)  # Ensure num_classes is set correctly (should be 5)

# Manually define class names
class_names = ['no event', 'heel strike', 'foot flat', 'heel off', 'toe off']

# Generate the classification report
print(classification_report(all_labels, all_preds, labels=labels, target_names=class_names))


              precision    recall  f1-score   support

    no event       0.94      1.00      0.97     55490
 heel strike       0.76      0.52      0.61       798
   foot flat       0.00      0.00      0.00         0
    heel off       0.00      0.00      0.00      1386
     toe off       0.36      0.00      0.01      1462

    accuracy                           0.94     59136
   macro avg       0.41      0.30      0.32     59136
weighted avg       0.91      0.94      0.92     59136



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
