Main LOO Cross Validation

Load Packages

In [1]:
from RADARDataCapture import FMCWRADARDataCapture
from MOCAPDataCapture import MOCAPDataCapture
from FPDataCapture import FPDataCapture
from StableRdmDataset import StableRdmDataset
import h5py
import os
import numpy as np
import pandas as pd
import imageio
import matplotlib.pyplot as plt
from datetime import datetime
import statistics
from matplotlib import animation
import torch
import torch.nn as nn
from torch import optim, nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from torch import optim
from datetime import datetime
from RdmSingleVariablePrediction import RdmCNNLSTMModel
from sklearn.model_selection import KFold
from scipy import stats
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

Load Events Dataframe

In [2]:
event_csv_path = "/Users/danielcopeland/Library/Mobile Documents/com~apple~CloudDocs/MIT Masters/DRL/LABx/RADARTreePose/data/csvs/MOCAP_FP_RADAR_FU_Stable_Break_FD_TIME_FRAMES_v3.csv"

event_labels_df = pd.read_csv(event_csv_path)
print(event_labels_df.head())

    RADAR_capture  MOCAP_Start_Time  MOCAP_End_Time  RADAR_Start_Frame  \
0  01_MNTRL_RR_V1              7.33           30.57                194   
1  01_MNTRL_RR_V1              7.33           30.57                194   
2  01_MNTRL_RR_V1              7.33           30.57                194   
3  01_MNTRL_RR_V2              7.37           30.40                191   
4  01_MNTRL_RR_V2              7.37           30.40                191   

   RADAR_End_Frame  Seconds_per_Frame  tx  is_final_tx  t_foot_up  t_stable  \
0              831            0.03645   1        False      10.22     11.19   
1              831            0.03645   2        False      18.25     19.05   
2              831            0.03645   3         True      26.24     27.03   
3              822            0.03646   1        False      10.48     11.05   
4              822            0.03646   2        False      18.41     19.07   

   t_break  t_foot_down  frame_foot_up  frame_stable  frame_break  frame_end  
0

Training and Testing Functions

In [3]:
def train_model(model, dataloader, criterion, optimizer, num_epochs=25, device=torch.device("cpu")):
    """
    Train a model on a given dataset.

    Parameters:
    - model: The neural network model to be trained.
    - dataloader: DataLoader for the training data.
    - criterion: The loss function.
    - optimizer: The optimization algorithm.
    - num_epochs: The number of epochs to train for.
    - device: The device (CPU or GPU) to train on.

    Returns:
    - model: The trained model.
    """

    # Move model to the specified device
    model.to(device)

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        model.train()  # Set model to training mode
        running_loss = 0.0

        # Iterate over data
        for inputs, velocities, _, _ in dataloader:
            inputs = inputs.to(device)
            velocities = velocities.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, velocities)

            # Backward pass + optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            
        epoch_loss = running_loss / len(dataloader.dataset)
        print(f'Train Loss: {epoch_loss:.4f}\n')

    print('Training complete')
    return model


def train_validate_model(model, dataloaders, criterion, optimizer, num_epochs=25, device=torch.device("cpu")):
    
    best_val_loss = float('inf')

    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            
            # Iterate over data.
            for inputs, velocities, _, _ in dataloaders[phase]:
                inputs = inputs.to(device)
                velocities = velocities.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, velocities)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                
            epoch_loss = running_loss / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f}')

                # Track the best performance
            if phase == 'val' and epoch_loss < best_val_loss:
                best_val_loss = epoch_loss
                print(f"New best validation loss is: {best_val_loss}")
        
        print()

    print('Training complete')

    return model, best_val_loss

def test_model(model, dataloader, device=torch.device("cpu")):
    model.eval()  # Set model to evaluate mode
    running_loss = 0.0
    criterion = nn.MSELoss()  # Example criterion, adjust as necessary

    # No gradient updates
    with torch.no_grad():
        for inputs, velocities, _, _ in dataloader:
            inputs = inputs.to(device)
            velocities = velocities.to(device)

            # Forward
            outputs = model(inputs)
            loss = criterion(outputs, velocities)

            # Statistics
            running_loss += loss.item() * inputs.size(0)

    total_loss = running_loss / len(dataloader.dataset)
    print(f'Test Loss: {total_loss:.4f}')


LOO Cross Validation

In [9]:

# Define the root directory and event CSV file path
root_dir = '/Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch'
event_csv = "/Users/danielcopeland/Library/Mobile Documents/com~apple~CloudDocs/MIT Masters/DRL/LABx/RADARTreePose/data/csvs/MOCAP_FP_RADAR_FU_Stable_Break_FD_TIME_FRAMES_v3.csv"

# Get all folders and shuffle them randomly
all_folders = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
random.seed(42)
random.shuffle(all_folders)

# Define the setup_loaders function
def setup_loaders(root_dir, event_csv, folders):
    datasets = {x: StableRdmDataset(root_dir, event_csv, folders[x], label_type="avg_speed") for x in ['train', 'test']}
    loaders = {x: DataLoader(datasets[x], batch_size=2, shuffle=True if x == 'train' else False, collate_fn=StableRdmDataset.collate_fn) for x in datasets}
    return loaders

# Model saving directory
model_save_dir = "/Users/danielcopeland/Library/Mobile Documents/com~apple~CloudDocs/MIT Masters/DRL/LABx/RADARTreePose/LOO_CV_models"

# Implement Leave-One-Out Cross-Validation
for i, test_folder in enumerate(all_folders):
    print(f"Starting LOO with {test_folder} as test set")
    
    # Split folders into training and test sets
    train_folders = [f for f in all_folders if f != test_folder]
    folders = {'train': train_folders, 'test': [test_folder]}
    
    # Create the DataLoaders
    loaders = setup_loaders(root_dir, event_csv, folders)
    train_loader, test_loader = loaders['train'], loaders['test']
    
    # Initialize the model
    model = RdmCNNLSTMModel(num_channels=1, hidden_dim=128, lstm_layers=2, bidirectional=False)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # Train the model on this fold's training data
    model = train_model(model, train_loader, criterion, optimizer, num_epochs=4, device=device)
    
    # Update model save path to include the test folder name
    model_save_path = os.path.join(model_save_dir, f'model_LOO_{test_folder}.pth')
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved to {model_save_path}")

    # Evaluation logic for each test folder as needed
    # Your evaluation logic here...

print("Leave-One-Out Cross-Validation complete.")

Starting LOO with 13 as test set
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/22/22_MNTRL_RR_V1_channel1_tx1.npy
The shape of the cpature is (32, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/22/22_MNTRL_RR_V1_channel2_tx1.npy
The shape of the cpature is (32, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/22/22_MNTRL_RR_V1_channel3_tx1.npy
The shape of the cpature is (32, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/22/22_MNTRL_RR_V1_channel4_tx1.npy
The shape of the cpature is (32, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/22/22_MNTRL_RR_V1_channel1_tx2.npy
The shape of the cpature is (96, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/22/22_MNTRL_RR_V1_channel2_tx2.npy
The shape of the cpature is (96, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/22/22_MNTRL_RR_V1_channel3_tx2.npy
The sha

[W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.


Epoch 1/4
----------
Train Loss: 0.0037

Epoch 2/4
----------
Train Loss: 0.0027

Epoch 3/4
----------
Train Loss: 0.0024

Epoch 4/4
----------
Train Loss: 0.0022

Training complete
Model saved to /Users/danielcopeland/Library/Mobile Documents/com~apple~CloudDocs/MIT Masters/DRL/LABx/RADARTreePose/LOO_CV_models/model_LOO_13.pth
Starting LOO with 22 as test set
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/13/13_MNTRL_RR_V1_channel1_tx1.npy
The shape of the cpature is (27, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/13/13_MNTRL_RR_V1_channel2_tx1.npy
The shape of the cpature is (27, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/13/13_MNTRL_RR_V1_channel3_tx1.npy
The shape of the cpature is (27, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/13/13_MNTRL_RR_V1_channel4_tx1.npy
The shape of the cpature is (27, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1C

Fine Tune Models

In [10]:
from torch.utils.data import DataLoader, Subset, random_split
from sklearn.model_selection import train_test_split


# Define the setup_loaders function
def setup_test_loaders(root_dir, event_csv, folders):
    datasets = {x: StableRdmDataset(root_dir, event_csv, folders[x], label_type="avg_speed") for x in ['test']}
    loaders = {x: DataLoader(datasets[x], batch_size=2, shuffle=True if x == 'train' else False, collate_fn=StableRdmDataset.collate_fn) for x in datasets}
    return loaders


def load_model(model_path, device):
    """Load a pre-trained model."""
    model = RdmCNNLSTMModel(num_channels=1, hidden_dim=128, lstm_layers=2, bidirectional=False) # Instantiate your model class
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    return model

def prepare_data_for_participant(test_loader, fraction=0.2, seed=42):
    """
    Splits the test participant's data into fine-tuning and evaluation subsets.
    
    Parameters:
    - test_loader: DataLoader for the test participant's data.
    - fraction: Fraction of data to use for fine-tuning.
    - seed: Random seed for reproducibility.
    
    Returns:
    - fine_tune_loader: DataLoader for the fine-tuning subset.
    - eval_loader: DataLoader for the evaluation subset.
    """
    # Determine split sizes
    total_size = len(test_loader.dataset)
    fine_tune_size = int(fraction * total_size)
    eval_size = total_size - fine_tune_size
    
    # Split the dataset
    fine_tune_subset, eval_subset = random_split(
        test_loader.dataset,
        [fine_tune_size, eval_size],
        generator=torch.Generator().manual_seed(seed)
    )
    
    # Create DataLoaders for each subset
    fine_tune_loader = DataLoader(fine_tune_subset, batch_size=2, shuffle=True, collate_fn=StableRdmDataset.collate_fn)
    eval_loader = DataLoader(eval_subset, batch_size=2, shuffle=False, collate_fn=StableRdmDataset.collate_fn)

    return fine_tune_loader, eval_loader

def fine_tune_model(model, fine_tune_loader, device, epochs=3):
    """Fine-tunes the model on a subset of data."""
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()  # Or your specific loss function
    for epoch in range(epochs):
        for inputs, labels, length, metadata in fine_tune_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

def evaluate_model(model, test_loader, device):
    model.eval()
    predictions, actuals = [], []
    with torch.no_grad():
        for batch in test_loader:
            # Assuming the first two elements are inputs and labels
            inputs, labels = batch[0].to(device), batch[1].to(device)
            
            # In case your model expects inputs in a specific shape based on 'length'
            # Adjust 'inputs' here if necessary, e.g., using 'length' to pack sequences
            
            outputs = model(inputs)
            predictions.extend(outputs.view(-1).cpu().numpy())
            actuals.extend(labels.view(-1).cpu().numpy())


    # Calculate R^2 score
    r2 = r2_score(actuals, predictions)
    return r2

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    models_dir = "/Users/danielcopeland/Library/Mobile Documents/com~apple~CloudDocs/MIT Masters/DRL/LABx/RADARTreePose/LOO_CV_models"
    root_dir = '/Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch'
    event_csv = "/Users/danielcopeland/Library/Mobile Documents/com~apple~CloudDocs/MIT Masters/DRL/LABx/RADARTreePose/data/csvs/MOCAP_FP_RADAR_FU_Stable_Break_FD_TIME_FRAMES_v3.csv"
    
    # Automatically load model paths
    model_paths = [os.path.join(models_dir, f) for f in os.listdir(models_dir) if f.startswith('model_LOO_') and f.endswith('.pth')]
    
    # Get all folders and shuffle them randomly for reproducibility if needed
    all_folders = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
    random.seed(42)
    random.shuffle(all_folders)

    for model_path in model_paths:
        model_name = os.path.basename(model_path)
        test_folder = model_name.replace('model_LOO_', '').replace('.pth', '')
        print(f"Evaluating {model_name} with {test_folder} as the test set")

        # Ensure the test folder is one of the available folders, skip otherwise
        if test_folder not in all_folders:
            print(f"Test folder {test_folder} not found in the dataset. Skipping...")
            continue

        model = load_model(model_path, device)
        folders = {'test': [test_folder]}
        loaders = setup_test_loaders(root_dir, event_csv, folders)
        test_loader = loaders['test']

        model_r2_scores = []

        for fraction in np.linspace(0.1, 0.5, 5):  # Incremental portions of the test data for fine-tuning
            # Use the prepare_data_for_participant function to split the dataset
            fine_tune_loader, eval_loader = prepare_data_for_participant(test_loader, fraction=fraction, seed=42)
            
            # Fine-tune the model with the fine_tune_loader
            fine_tune_model(model, fine_tune_loader, device)
            
            # Evaluate the model with the eval_loader (contains data not seen during fine-tuning)
            r2 = evaluate_model(model, eval_loader, device)
            model_r2_scores.append((fraction, r2))

        r2_scores.append((test_folder, model_r2_scores))

    # Plotting logic remains the same

if __name__ == "__main__":
    main()

Evaluating model_LOO_24.pth with 24 as the test set
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/24/24_MNTRL_RR_V1_channel1_tx1.npy
The shape of the 24_MNTRL_RR_V1 is: (100, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/24/24_MNTRL_RR_V1_channel2_tx1.npy
The shape of the 24_MNTRL_RR_V1 is: (100, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/24/24_MNTRL_RR_V1_channel3_tx1.npy
The shape of the 24_MNTRL_RR_V1 is: (100, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/24/24_MNTRL_RR_V1_channel4_tx1.npy
The shape of the 24_MNTRL_RR_V1 is: (100, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/24/24_MNTRL_RR_V1_channel1_tx2.npy
The shape of the 24_MNTRL_RR_V1 is: (101, 23, 13)
found capture tx: /Volumes/FourTBLaCie/Yoga_Study_RADAR_Stable_1Ch/24/24_MNTRL_RR_V1_channel2_tx2.npy
The shape of the 24_MNTRL_RR_V1 is: (101, 23, 13)
found capture tx: /Volumes/FourTBLaC

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


ValueError: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.

Plot R2 vs % data included

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example r2_scores structure for clarity:
# r2_scores = [
#     ('model_LOO_folder1', [(0.1, 0.8), (0.2, 0.82), ..., (1.0, 0.9)]),
#     ('model_LOO_folder2', [(0.1, 0.75), (0.2, 0.77), ..., (1.0, 0.88)]),
#     ...
# ]

# Assuming r2_scores is filled as above
# Generate a unique color for each test folder
colors = plt.cm.jet(np.linspace(0, 1, len(r2_scores)))

plt.figure(figsize=(10, 7))

for (model_name, scores), color in zip(r2_scores, colors):
    # Extract the fraction of test data and corresponding R^2 scores
    fractions, r2s = zip(*scores)
    test_folder = model_name.replace('model_LOO_', '')
    plt.plot(fractions, r2s, label=f'{test_folder}', color=color)

plt.xlabel('% of Test Data Used for Fine-Tuning')
plt.ylabel('R^2 Score')
plt.title('Model Performance vs. % Fine-Tuning by Test Folder')
plt.legend(title='Test Folder', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
