In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import classification_report

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

# Data Loading and Preprocessing Functions

In this section, we define two essential functions for handling our dataset: `load_data_from_files` and `preprocess_data`. These functions streamline the process of loading raw data from CSV files and preparing it for analysis or modeling.

---

## `load_data_from_files(data_dir)`

### Purpose
This function is responsible for loading all CSV files from a specified directory, extracting relevant metadata from each filename, and consolidating the data into a single pandas DataFrame.

### Parameters
- **`data_dir`** (`str`): The directory path where the CSV data files are stored.

### Process
1. **Initialize Storage:**
   - An empty list `all_data` is created to hold individual DataFrames from each CSV file.

2. **Iterate Through Files:**
   - The function loops through each file in the `data_dir`.
   - It filters out files that do not end with the `.csv` extension to ensure only relevant data files are processed.

3. **Extract Metadata from Filename:**
   - Filenames are expected to follow a specific format, allowing extraction of metadata:
     - **`collection_id`**: Identifies the collection batch.
     - **`step_info`**: Contains information about the step number and foot side.
   - **`step_number`** is extracted by filtering digits from `step_info`.
   - **`foot`** is determined based on the presence of 'R' (Right) or 'L' (Left) in `step_info`.

4. **Load and Augment Data:**
   - Each CSV file is read into a pandas DataFrame `df`.
   - New columns are added to `df` to include the extracted metadata:
     - `collection_id`
     - `step_number` (converted to integer)
     - `foot`
     - `filename` (to keep track of the source file)

5. **Aggregate Data:**
   - The augmented DataFrame `df` is appended to the `all_data` list.

6. **Concatenate All Data:**
   - After processing all files, `all_data` is concatenated into a single DataFrame using `pd.concat`, with the index reset for consistency.

### Returns
- **`pd.DataFrame`**: A consolidated DataFrame containing data from all CSV files, enriched with metadata.

---

## `preprocess_data(data)`

### Purpose
This function preprocesses the loaded data to make it suitable for machine learning models. It handles missing values, normalizes feature scales, and encodes categorical target labels.

### Parameters
- **`data`** (`pd.DataFrame`): The raw DataFrame containing sensor data and metadata.

### Process
1. **Define Features and Target:**
   - **`features`**: A list of sensor measurement columns:
     - `'gyroscope_x'`, `'gyroscope_y'`, `'gyroscope_z'`
     - `'accelerometer_x'`, `'accelerometer_y'`, `'accelerometer_z'`
   - **`target`**: The label column `'phase'` that the model will predict.

2. **Handle Missing Values:**
   - Rows with missing (`NaN`) values in any of the `features` or the `target` are removed using `dropna`. This ensures data integrity for subsequent processing steps.

3. **Normalize Features:**
   - A `MinMaxScaler` is instantiated to scale feature values to a range between 0 and 1.
   - The scaler is fitted to the `features` and used to transform the data, ensuring that all features contribute equally to the model training.

4. **Encode Target Labels:**
   - A `LabelEncoder` is used to convert categorical target labels in the `'phase'` column into numerical values.
   - This encoding is essential for algorithms that require numerical input for the target variable.

### Returns
- **`data`** (`pd.DataFrame`): The preprocessed DataFrame with normalized features and encoded target labels.
- **`label_encoder`** (`LabelEncoder`): The fitted label encoder instance, useful for inverse transforming predictions back to original labels.

---

By utilizing these functions, we ensure that our data is systematically loaded and preprocessed, paving the way for effective analysis and modeling.

In [2]:
############################################
# Data Loading and Preprocessing Functions #
############################################

def load_data_from_files(data_dir):
    all_data = []
    for filename in os.listdir(data_dir):
        if filename.endswith('.csv'):
            # Extract metadata from filename
            collection_id, step_info, _ = filename.split('_', 2)
            step_number = ''.join(filter(str.isdigit, step_info))
            foot = 'R' if 'R' in step_info else 'L'
            filepath = os.path.join(data_dir, filename)
            df = pd.read_csv(filepath)
            df['collection_id'] = collection_id
            df['step_number'] = int(step_number)
            df['foot'] = foot
            df['filename'] = filename  # Keep track of the file
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

def preprocess_data(data):
    # Define features and target
    features = ['gyroscope_x', 'gyroscope_y', 'gyroscope_z',
                'accelerometer_x', 'accelerometer_y', 'accelerometer_z']
    target = 'phase'

    # Drop rows with missing values
    data = data.dropna(subset=features + [target])

    # Normalize features
    scaler = MinMaxScaler()
    data[features] = scaler.fit_transform(data[features])

    # Encode target labels
    label_encoder = LabelEncoder()
    data[target] = label_encoder.fit_transform(data[target])

    return data, label_encoder

# Dataset and DataLoader Preparation

In this section, we define the `SensorDataset` class, a custom dataset tailored for handling time-series sensor data. This class is essential for preparing our data in a format suitable for training machine learning models, particularly those that operate on sequential data, such as Recurrent Neural Networks (RNNs) or Temporal Convolutional Networks (TCNs).

---

## `SensorDataset` Class

### Purpose
The `SensorDataset` class inherits from PyTorch's `Dataset` and is designed to:
- Organize sensor data into sequences of a specified length.
- Associate each sequence with the corresponding target label.
- Facilitate efficient data loading during model training and evaluation.

### Initialization (`__init__`)

#### Parameters
- **`data`** (`pd.DataFrame`): The preprocessed DataFrame containing sensor measurements and metadata.
- **`features`** (`list` of `str`): List of feature column names to be used as input for the model (e.g., sensor readings).
- **`target`** (`str`): The name of the target column that the model aims to predict (e.g., `'phase'`).
- **`sequence_length`** (`int`): The number of consecutive time steps to include in each input sequence.

#### Process
1. **Attribute Assignment:**
   - Stores the provided `features`, `target`, and `sequence_length` as instance attributes for later use.

2. **Initialization of Storage Lists:**
   - `self.sequences`: A list to store the input sequences.
   - `self.labels`: A list to store the corresponding target labels for each sequence.

3. **Grouping Data by Filename:**
   - The data is grouped by the `'filename'` column to ensure that sequences are generated from contiguous data points within the same file, preventing mixing data from different sources.

4. **Sequence Generation:**
   - For each group (i.e., each file), the following steps are performed:
     - **Reset Index:**
       - The group's index is reset to ensure sequential access.
     - **Check Group Length:**
       - Only groups with a length equal to or exceeding the `sequence_length` are considered, ensuring that each sequence has the required number of time steps.
     - **Sliding Window Approach:**
       - A sliding window iterates over the group to extract sequences:
         - **Sequence (`seq`):**
           - A subset of the group's data spanning `sequence_length` consecutive rows.
           - Extracts the specified `features` and converts them to a NumPy array.
         - **Label (`y`):**
           - The target value at the last time step of the sequence, representing the event or phase to be predicted.
       - The extracted `seq` and `y` are appended to `self.sequences` and `self.labels`, respectively.

### Length (`__len__`)

#### Purpose
Returns the total number of sequences available in the dataset.

#### Implementation
```python
def __len__(self):
    return len(self.sequences)

In [3]:
#########################################
# Dataset and DataLoader Preparation    #
#########################################

class SensorDataset(Dataset):
    def __init__(self, data, features, target, sequence_length):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        self.sequences = []
        self.labels = []

        grouped = data.groupby('filename')
        for _, group in grouped:
            group = group.reset_index(drop=True)
            group_length = len(group)
            if group_length >= sequence_length:
                # Generate sequences using a sliding window
                for i in range(group_length - sequence_length + 1):
                    seq = group.iloc[i:i+sequence_length]
                    self.sequences.append(seq[self.features].values)
                    # Use the event at the last time point as the label
                    self.labels.append(seq[self.target].values[-1])

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        X = self.sequences[idx]
        y = self.labels[idx]
        return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# Model Definition

In this section, we define the `LSTMClassifier` class, a custom neural network model built using PyTorch. This model leverages Long Short-Term Memory (LSTM) layers to effectively capture temporal dependencies in sequential sensor data, making it well-suited for classification tasks based on time-series inputs.

---

## `LSTMClassifier` Class

### Purpose
The `LSTMClassifier` is designed to perform classification tasks on sequential data by utilizing LSTM layers to model temporal relationships. It processes input sequences of sensor data and outputs class probabilities corresponding to different phases or events.

### Inheritance
- **`nn.Module`**: The class inherits from PyTorch's `nn.Module`, enabling integration with PyTorch's model training and evaluation frameworks.

### Initialization (`__init__`)

#### Parameters
- **`input_dim`** (`int`): The number of input features per time step (e.g., number of sensor readings).
- **`hidden_dim`** (`int`): The number of features in the hidden state of the LSTM. Determines the capacity of the LSTM to capture patterns.
- **`num_layers`** (`int`): The number of stacked LSTM layers. More layers can capture more complex temporal patterns.
- **`num_classes`** (`int`): The number of output classes for classification.
- **`dropout`** (`float`, optional): The dropout probability for regularization between LSTM layers. Default is `0.5`.

#### Components
1. **LSTM Layer (`self.lstm`):**
   - **Architecture:**
     - **Input Size (`input_dim`)**: The dimensionality of the input features.
     - **Hidden Size (`hidden_dim`)**: The dimensionality of the hidden state.
     - **Number of Layers (`num_layers`)**: Stacked LSTM layers for deeper temporal modeling.
     - **Batch First (`batch_first=True`)**: Ensures that the input and output tensors are provided as `(batch, seq, feature)`.
     - **Dropout (`dropout=dropout`)**: Applies dropout between LSTM layers to prevent overfitting.
   - **Purpose:** Processes the input sequences and captures temporal dependencies across time steps.

2. **Fully Connected Layer (`self.fc`):**
   - **Architecture:**
     - **Input Features (`hidden_dim`)**: Matches the hidden state size of the LSTM.
     - **Output Features (`num_classes`)**: Corresponds to the number of target classes.
   - **Purpose:** Maps the final hidden state of the LSTM to class scores for classification.

#### Implementation
```python
def __init__(self, input_dim, hidden_dim, num_layers, num_classes, dropout=0.5):
    super(LSTMClassifier, self).__init__()
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers

    self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers,
                        batch_first=True, dropout=dropout)
    self.fc = nn.Linear(hidden_dim, num_classes)

In [4]:
#########################################
# Model Definition                      #
#########################################

class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes, dropout=0.5):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers,
                            batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # Take output from the last time step
        return out

# Training and Validation Functions

This section defines `train_epoch` and `validate_epoch` functions for training and validating the `LSTMClassifier` model.

---

## `train_epoch`

### Purpose
Trains the model for one epoch by processing batches, computing loss, performing backpropagation, and updating model weights.

### Parameters
- **`model`** (`nn.Module`): The neural network model.
- **`loader`** (`DataLoader`): DataLoader for training data.
- **`criterion`** (`nn.Module`): Loss function.
- **`optimizer`** (`torch.optim.Optimizer`): Optimization algorithm.
- **`device`** (`torch.device`): Computation device.

### Returns
- **`epoch_loss`** (`float`): Average loss for the epoch.
- **`epoch_acc`** (`float`): Accuracy for the epoch.

---

## `validate_epoch`

### Purpose
Evaluates the model on the validation dataset without updating weights.

### Parameters
- **`model`** (`nn.Module`): The neural network model.
- **`loader`** (`DataLoader`): DataLoader for validation data.
- **`criterion`** (`nn.Module`): Loss function.
- **`device`** (`torch.device`): Computation device.

### Returns
- **`epoch_loss`** (`float`): Average loss for the epoch.
- **`epoch_acc`** (`float`): Accuracy for the epoch.


In [5]:
#########################################
# Training and Validation Functions     #
#########################################

def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

            running_loss += loss.item() * X_batch.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [6]:
#########################################
# Hyperparameter Tuning Configuration   #
#########################################

data_dir = 'csv_output_phases'  # Replace with your directory
raw_data = load_data_from_files(data_dir)
data, label_encoder = preprocess_data(raw_data)

# Hyperparameters to tune
lstm_layers_options = [3]
batch_size_options = [128]
sequence_length_options = [30]
hidden_dim_options = [128]
learning_rate_options = [0.001]
optimizer_options = ['adam']
# Different splits could be tested, but keep at least one split consistent first:
data_splits = [
    (0.7, 0.15, 0.15),
]

num_epochs = 10
input_dim = len(['gyroscope_x', 'gyroscope_y', 'gyroscope_z',
                 'accelerometer_x', 'accelerometer_y', 'accelerometer_z'])
num_classes = len(label_encoder.classes_)

best_val_acc = -1.0
best_config = None
best_model_state = None

for split in data_splits:
    train_ratio, val_ratio, test_ratio = split

    for seq_length in sequence_length_options:
        # Rebuild dataset and splits for each sequence length
        dataset = SensorDataset(data, 
                                features=['gyroscope_x', 'gyroscope_y', 'gyroscope_z',
                                          'accelerometer_x', 'accelerometer_y', 'accelerometer_z'],
                                target='phase', 
                                sequence_length=seq_length)

        dataset_size = len(dataset)
        train_size = int(train_ratio * dataset_size)
        val_size = int(val_ratio * dataset_size)
        test_size = dataset_size - train_size - val_size

        # Set seed for reproducibility in splits
        torch.manual_seed(42)
        train_dataset, val_dataset, test_dataset = random_split(
            dataset, [train_size, val_size, test_size])

        for batch_size in batch_size_options:
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

            for lstm_layers in lstm_layers_options:
                for hidden_dim in hidden_dim_options:
                    for lr in learning_rate_options:
                        for opt_name in optimizer_options:
                            
                            # Initialize model
                            model = LSTMClassifier(input_dim, hidden_dim, lstm_layers, num_classes, dropout=0.5).to(device)
                            criterion = nn.CrossEntropyLoss()

                            if opt_name == 'adam':
                                optimizer = torch.optim.Adam(model.parameters(), lr=lr)
                            else:
                                optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)

                            # Training loop
                            for epoch in range(num_epochs):
                                train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
                                val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
                                print(f'Epoch {epoch+1}/{num_epochs}, '
                                f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, '
                                f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

                            # Check if this is the best so far
                            if val_acc > best_val_acc:
                                best_val_acc = val_acc
                                best_config = {
                                    'train_ratio': train_ratio,
                                    'val_ratio': val_ratio,
                                    'test_ratio': test_ratio,
                                    'sequence_length': seq_length,
                                    'batch_size': batch_size,
                                    'lstm_layers': lstm_layers,
                                    'hidden_dim': hidden_dim,
                                    'learning_rate': lr,
                                    'optimizer': opt_name
                                }
                                best_model_state = model.state_dict()

Epoch 1/10, Train Loss: 0.6469, Train Acc: 0.6865, Val Loss: 0.2661, Val Acc: 0.8970
Epoch 2/10, Train Loss: 0.2646, Train Acc: 0.8986, Val Loss: 0.2514, Val Acc: 0.9037
Epoch 3/10, Train Loss: 0.2550, Train Acc: 0.9013, Val Loss: 0.2467, Val Acc: 0.9028
Epoch 4/10, Train Loss: 0.2480, Train Acc: 0.9028, Val Loss: 0.2374, Val Acc: 0.9062
Epoch 5/10, Train Loss: 0.2426, Train Acc: 0.9043, Val Loss: 0.2338, Val Acc: 0.9077
Epoch 6/10, Train Loss: 0.2392, Train Acc: 0.9052, Val Loss: 0.2282, Val Acc: 0.9072
Epoch 7/10, Train Loss: 0.2357, Train Acc: 0.9056, Val Loss: 0.2277, Val Acc: 0.9084
Epoch 8/10, Train Loss: 0.2323, Train Acc: 0.9069, Val Loss: 0.2231, Val Acc: 0.9091
Epoch 9/10, Train Loss: 0.2303, Train Acc: 0.9073, Val Loss: 0.2241, Val Acc: 0.9095
Epoch 10/10, Train Loss: 0.2293, Train Acc: 0.9078, Val Loss: 0.2212, Val Acc: 0.9112


# Evaluate Best Model on Test Set

This section assesses the performance of the best model configuration on the test dataset.

---

## Steps

1. **Display Best Metrics:**
   - Prints the highest validation accuracy achieved and the corresponding configuration parameters.

2. **Rebuild Dataset and Model:**
   - Extracts the best configuration settings such as sequence length, batch size, LSTM layers, hidden dimensions, learning rate, optimizer type, and dataset split ratios.
   - Creates a `SensorDataset` using the optimal sequence length.
   - Splits the dataset into training, validation, and test sets based on the best ratios.

3. **Initialize DataLoaders:**
   - Sets up DataLoaders for training, validation, and testing with the optimal batch size and shuffling where appropriate.

4. **Recreate and Load the Model:**
   - Instantiates the `LSTMClassifier` with the best hyperparameters.
   - Loads the saved state of the best-performing model.
   - Sets the model to evaluation mode to prepare for testing.

5. **Evaluate on Test Set:**
   - Iterates through the test DataLoader, making predictions with the model.
   - Collects all predictions and true labels.
   - Generates and prints a classification report detailing precision, recall, f1-score, and support for each class.

---

By executing these steps, we validate the model's effectiveness on unseen data, ensuring its ability to generalize beyond the training and validation sets.

In [10]:
#########################################
# Evaluate Best Model on Test Set       #
#########################################

print("Best Validation Accuracy: ", best_val_acc)
print("Best Configuration: ", best_config)

# Rebuild the dataset and model with best config
sequence_length = best_config['sequence_length']
batch_size = best_config['batch_size']
lstm_layers = best_config['lstm_layers']
hidden_dim = best_config['hidden_dim']
lr = best_config['learning_rate']
opt_name = best_config['optimizer']
train_ratio = best_config['train_ratio']
val_ratio = best_config['val_ratio']
test_ratio = best_config['test_ratio']

# Recreate dataset with the best sequence length
best_dataset = SensorDataset(data,
                            features=['gyroscope_x', 'gyroscope_y', 'gyroscope_z',
                                      'accelerometer_x', 'accelerometer_y', 'accelerometer_z'],
                            target='phase', 
                            sequence_length=sequence_length)

dataset_size = len(best_dataset)
train_size = int(train_ratio * dataset_size)
val_size = int(val_ratio * dataset_size)
test_size = dataset_size - train_size - val_size

torch.manual_seed(42)
train_dataset, val_dataset, test_dataset = random_split(
    best_dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Recreate model and load best state
final_model = LSTMClassifier(input_dim, hidden_dim, lstm_layers, num_classes, dropout=0.5).to(device)
final_model.load_state_dict(best_model_state)
final_model.eval()

# Evaluate on test set
all_preds = []
all_labels = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        outputs = final_model(X_batch)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(y_batch.cpu().numpy())

class_names = ['no event', 'heel strike', 'foot flat', 'heel off', 'toe off']
print(classification_report(all_labels, all_preds, target_names=class_names))

Best Validation Accuracy:  0.9112463662790697
Best Configuration:  {'train_ratio': 0.7, 'val_ratio': 0.15, 'test_ratio': 0.15, 'sequence_length': 30, 'batch_size': 128, 'lstm_layers': 3, 'hidden_dim': 128, 'learning_rate': 0.001, 'optimizer': 'adam'}
              precision    recall  f1-score   support

    no event       0.84      0.21      0.34       265
 heel strike       0.00      0.00      0.00        33
   foot flat       0.90      0.94      0.92     22740
    heel off       0.82      0.77      0.80     14536
     toe off       0.96      0.97      0.96     28474

    accuracy                           0.91     66048
   macro avg       0.70      0.58      0.60     66048
weighted avg       0.91      0.91      0.91     66048



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
# After training is complete
model_save_path = 'best_lstm_model_phases.pth'
torch.save(final_model.state_dict(), model_save_path)
print(f"Model weights saved to {model_save_path}")

Model weights saved to best_lstm_model_phases.pth


In [None]:
# # Initialize the model architecture
# loaded_model = LSTMClassifier(input_dim, hidden_dim, num_layers, num_classes)
# loaded_model.to(device)

# # Load the saved weights
# model_load_path = 'lstm_model_phases.pth'
# loaded_model.load_state_dict(torch.load(model_load_path))
# print(f"Model weights loaded from {model_load_path}")

# # Set the model to evaluation mode
# loaded_model.eval()

In [None]:
# # Evaluate on the test set
# test_loss, test_acc = validate(loaded_model, test_loader, criterion, device)
# print(f'Loaded Model Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')

In [None]:
# from sklearn.metrics import classification_report
# import numpy as np

# # Collect predictions and true labels
# all_preds = []
# all_labels = []

# with torch.no_grad():
#     for X_batch, y_batch in test_loader:
#         X_batch = X_batch.to(device)
#         y_batch = y_batch.to(device)
#         outputs = loaded_model(X_batch)
#         _, predicted = torch.max(outputs.data, 1)
#         all_preds.extend(predicted.cpu().numpy())
#         all_labels.extend(y_batch.cpu().numpy())

# # Define all possible labels
# labels = range(num_classes)  # Ensure num_classes is set correctly (should be 5)

# # Manually define class names
# class_names = ['no event', 'heel strike', 'foot flat', 'heel off', 'toe off']

# # Generate the classification report
# print(classification_report(all_labels, all_preds, labels=labels, target_names=class_names))


In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# # Define phase labels
# phase_labels = {
#     0: 'no event',
#     1: 'heel strike',
#     2: 'foot flat',
#     3: 'heel off',
#     4: 'toe off'
# }
# # Create subplots for each feature
# fig, axes = plt.subplots(2, 3, figsize=(24, 12))

# # Plot each feature over time for the first collection_id with different colors for each phase
# features = ['gyroscope_x', 'gyroscope_y', 'gyroscope_z', 'accelerometer_x', 'accelerometer_y', 'accelerometer_z']
# for i, feature in enumerate(features):
#     ax = axes[i // 3, i % 3]
#     for phase in phases:
#         phase_data = first_collection_data[first_collection_data['phase'] == phase]
#         ax.plot(phase_data['elapsed_time'], phase_data[feature], label=f'{feature} - {phase_labels[phase]}')
    
#     ax.set_xlabel('Elapsed Time (seconds)')
#     ax.set_ylabel(feature)
#     ax.set_title(f'{feature} over Time for Collection ID: {first_collection_id}')
#     ax.legend()

# plt.tight_layout()
# plt.show()
