This Notebook contains implementations of neural network models that process video frames and pose landmarks to classify actions and predict ratings.


In [1]:
import os
import json

import cv2
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from transformers import AutoImageProcessor, AutoModelForPreTraining

In [2]:
# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    print("CUDA is available. PyTorch can use the GPU.")
    print(f"Device name: {torch.cuda.get_device_name(0)}")
else:
    print("CUDA is not available. PyTorch is using the CPU.")

CUDA is available. PyTorch can use the GPU.
Device name: Tesla T4


In [None]:
base_path = "/content/drive/MyDrive/Vision_GYM_Research/Data"
LOG_DIR = "/content/drive/MyDrive/Vision_GYM_Research/tensorboard_logs"

## Functions

### `load_and_resize_frames`
- **Description**: Loads and resizes video frames from specified file paths.
- **Parameters**:
  - `num_video_frontal`: Identifier for the video file.
  - `num_idx`: Index for the specific video segment.
  - `num_frames`: Number of frames to load.
  - `size`: Target size for resizing frames (default: (224, 224)).
- **Returns**: List of resized frames.

### `extract_pose_landmarks`
- **Description**: Extracts pose landmarks from a list of video frames using MediaPipe.
- **Parameters**:
  - `video_frames`: List of frames (each of size [224, 224, 3]).
- **Returns**: Array of pose landmarks with shape [num_frames, 33, 3].


In [9]:
def load_and_resize_frames(num_video, action, isFront, num_idx, num_frames, size=(224, 224)):
    """
    Loads and resizes video frames from specified file paths based on the action type.

    Args:
        num_video (int): Identifier for the video file.
        action (str): The action being performed ('deadlift', 'squat', 'lunges').
        isFront (int): 1 for frontal frames, 0 for lateral frames.
        num_idx (int): Index for the specific video segment.
        num_frames (int): Number of frames to load.
        size (tuple): Target size for resizing frames (default is (224, 224)).

    Returns:
        list: A list of resized frames.
    """
    frames = []

    action_to_folder = {
        'Deadlift': 'Deadlift_Frames',
        'Squat': 'Squat_Frames',
        'lunges': 'Lunges_Frames'
    }

    if action not in action_to_folder:
        raise ValueError(f"Unknown action: {action}")

    base_folder = os.path.join(base_path, action_to_folder[action])

    # Load and resize frames
    for i in range(1, num_frames + 1):
        path = os.path.join(base_folder, f"{num_video}_idx_{num_idx}_{i}.jpg")
        img = cv2.imread(path)
        if img is not None:
            img_resized = cv2.resize(img, size)
            frames.append(img_resized)
        else:
            print(f"Warning: Could not load image at {path}")
    return frames

In [5]:
def process_action_data(base_path, action_name):
    """
    Process data for a specific action by loading the corresponding Excel and JSON files,
    adding pose data, and splitting into train, validation, and test sets.

    Args:
        base_path (str): The base directory containing the files.
        action_name (str): The name of the action (e.g., 'squat', 'deadlift', 'lunges').

    Returns:
        tuple: train_df, val_df, test_df DataFrames.
    """
    # Load the Excel file
    excel_file = f"{action_name}_edited.xlsx"
    df = pd.read_excel(os.path.join(base_path, excel_file))

    # Load the JSON files for front and lateral poses
    front_pose_file = f"front_pose_{action_name}.json"
    lat_pose_file = f"lat_pose_{action_name}.json"

    def load_json_as_numpy(json_file):
        with open(json_file, 'r') as file:
            data = json.load(file)
        return np.array(data)

    front_pose_array = load_json_as_numpy(os.path.join(base_path, front_pose_file))
    lat_pose_array = load_json_as_numpy(os.path.join(base_path, lat_pose_file))

    # Ensure `front_pose` and `lat_pose` columns exist
    if 'front_pose' not in df.columns:
        df['front_pose'] = None
    if 'lat_pose' not in df.columns:
        df['lat_pose'] = None

    # Assign the loaded arrays to the DataFrame if lengths match
    if len(front_pose_array) == len(df) and len(lat_pose_array) == len(df):
        df['front_pose'] = list(front_pose_array)
        df['lat_pose'] = list(lat_pose_array)
    else:
        raise ValueError("The length of the loaded arrays does not match the DataFrame.")

    # Shuffle the DataFrame
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)

    # Define split ratios
    train_ratio, val_ratio, test_ratio = 0.7, 0.15, 0.15

    # Calculate the number of samples for each set
    total_samples = len(df)
    train_size = int(total_samples * train_ratio)
    val_size = int(total_samples * val_ratio)

    # Split the DataFrame
    train_df = df.iloc[:train_size]
    val_df = df.iloc[train_size:train_size + val_size]
    test_df = df.iloc[train_size + val_size:]

    return train_df, val_df, test_df

In [19]:
train_df_squat, val_df_squat, test_df_squat = process_action_data(base_path, 'squat')
train_df_dead, val_df_dead, test_df_dead = process_action_data(base_path, 'deadlift')
train_df_lunge, val_df_lunge, test_df_lunge = process_action_data(base_path, 'lunges')


# Define the actions
actions = ['squat', 'deadlift', 'lunges']

# Process data for each action and store results in dictionaries
splits = {action: process_action_data(base_path, action) for action in actions}

# Concatenate and shuffle DataFrames for each split
train_df = pd.concat([splits[action][0] for action in actions]).sample(frac=1, random_state=1).reset_index(drop=True)
val_df = pd.concat([splits[action][1] for action in actions]).sample(frac=1, random_state=1).reset_index(drop=True)
test_df = pd.concat([splits[action][2] for action in actions]).sample(frac=1, random_state=1).reset_index(drop=True)

### `Pose_Model`
- **Description**: A model that processes pose landmarks to output a feature vector.
- **Methods**:
  - `forward(pose_landmarks)`: Forward pass through the Pose model.

In [79]:
# class ResidualBlock(nn.Module):
#     """
#     A residual block for 1D convolutional layers.
#     """
#     def __init__(self, in_channels, out_channels, stride=1):
#         super(ResidualBlock, self).__init__()
#         self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
#         self.bn1 = nn.BatchNorm1d(out_channels)
#         self.relu = nn.ReLU()
#         self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
#         self.bn2 = nn.BatchNorm1d(out_channels)

#         # Shortcut connection
#         self.shortcut = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride)
#         self.shortcut_bn = nn.BatchNorm1d(out_channels)

#     def forward(self, x):
#         shortcut = self.shortcut(x)
#         shortcut = self.shortcut_bn(shortcut)

#         x = self.conv1(x)
#         x = self.bn1(x)
#         x = self.relu(x)
#         x = self.conv2(x)
#         x = self.bn2(x)

#         x += shortcut
#         x = self.relu(x)
#         return x

class Pose_Model(nn.Module):
    """
    Pose_Model is a PyTorch neural network module designed to process pose landmarks
    and output a feature vector for further analysis.
    """

    def __init__(self, input_size=528, hidden_size1=512, hidden_size2=1024, hidden_size3=2048, final_size=2048):
        super(Pose_Model, self).__init__()

        # Define the neural network layers
        self.fc1 = nn.Linear(input_size, hidden_size1)    # First hidden layer with reduced size
        self.bn1 = nn.BatchNorm1d(hidden_size1)           # Batch Normalization
        self.relu1 = nn.ReLU()

        self.fc2 = nn.Linear(hidden_size1, hidden_size2)  # Second hidden layer with increased size
        self.bn2 = nn.BatchNorm1d(hidden_size2)           # Batch Normalization
        self.relu2 = nn.ReLU()

        self.fc3 = nn.Linear(hidden_size2, hidden_size3)  # Third hidden layer with increased size
        self.bn3 = nn.BatchNorm1d(hidden_size3)           # Batch Normalization
        self.relu3 = nn.ReLU()

        self.fc4 = nn.Linear(hidden_size3, final_size)    # Output layer
        self.relu4 = nn.ReLU()

    def forward(self, x):
        """
        Forward pass through the Pose Model.

        pose_landmarks: Tensor of shape [batch_size, num_frames, 528]
        """
        try:
            batch_size, num_frames, _ = x.shape     # batch_size, num_frames, 528

            # Process all frames at once
            x = self.fc1(x.view(-1, x.size(-1)))  # Shape: [batch_size*num_frames, hidden_size1]
            x = self.bn1(x)  # Batch Normalization
            x = self.relu1(x)

            x = self.fc2(x)
            x = self.bn2(x)  # Batch Normalization
            x = self.relu2(x)

            x = self.fc3(x)
            x = self.bn3(x)  # Batch Normalization
            x = self.relu3(x)

            x = self.fc4(x)  # Final layer
            x = self.relu4(x)

            # Reshape back to [batch_size, num_frames, final_size]
            frame_features = x.view(batch_size, num_frames, -1)  # Shape: [batch_size, num_frames, final_size]

            # Aggregate the frame features (mean pooling)
            pooled_features = frame_features.mean(dim=1)  # Shape: [batch_size, final_size]

            # print("pooled_features shape:", pooled_features.shape)

        except Exception as e:
            print("An error occurred in Pose_Model:")
            print(f"Error: {e}")
        return pooled_features  # Shape: [batch_size, final_size]

## `Dual_Combined_Model`

- **Description**: Combines two instances of `Combined_Video_Pose_Model` to produce classification and rating outputs.

- **Methods**:
  - `forward(pixel_values_1, bool_masked_pos_1, pose_landmarks_tensor_1, pixel_values_2, bool_masked_pos_2, pose_landmarks_tensor_2)`: Forward pass through the dual model.


In [78]:
class Dual_Combined_Model(nn.Module):
    """
    Dual_Combined_Model combines two instances of Combined_Video_Pose_Model
    to produce classification and rating outputs.
    """

    def __init__(self, input_size=4096, hidden_size=512):
        super(Dual_Combined_Model, self).__init__()

        # Initialize two instances of Combined_Video_Pose_Model
        self.front_model = Pose_Model()
        self.lat_model = Pose_Model()

        # Fully connected layers for classification
        self.classification_layer = nn.Linear(input_size, hidden_size)  # Initial layer after concatenation
        self.classification_relu = nn.ReLU()
        self.classification_dropout = nn.Dropout(0.5)

        self.classification_output = nn.Linear(hidden_size, 3)  # Output layer for 3 classes

    def forward(self, lat_pose, front_pose):
        """
        Forward pass for combining lateral and frontal pose inputs.

        Args:
            lat_pose: Tensor of shape [batch_size, num_frames, input_size] for lateral poses.
            front_pose: Tensor of shape [batch_size, num_frames, input_size] for frontal poses.

        Returns:
            classification_output: Tensor of shape [batch_size, 3].
            combined_hidden: Tensor of shape [batch_size, 4096].
        """
        # Process lateral and frontal poses independently
        lat_hidden = self.lat_model(lat_pose)  # Shape: [batch_size, 2048]
        front_hidden = self.front_model(front_pose)  # Shape: [batch_size, 2048]

        if lat_hidden is None or front_hidden is None:
            raise ValueError("One of the models returned None.")

        # Concatenate the features from both models
        combined_hidden = torch.cat((lat_hidden, front_hidden), dim=1)  # Shape: [batch_size, 4096]
        # print("combined_hidden shape", combined_hidden.shape)
        # Classification path
        classification_hidden = self.classification_layer(combined_hidden)
        classification_hidden = self.classification_relu(classification_hidden)
        classification_hidden = self.classification_dropout(classification_hidden)
        classification_output = self.classification_output(classification_hidden)

        return classification_output, combined_hidden


## `CriteriaPredictionModel`

- **Description**: A deep neural network for predicting multiple binary ratings (yes/no) using a series of fully connected layers, ReLU activations, and dropout for regularization. The model outputs a probability for each rating.

- **Methods**:
  - `forward(x)`: Forward pass through the network. The input is passed through five fully connected layers, each with ReLU and dropout, and the final output is processed by a sigmoid activation to produce binary classification probabilities.


In [85]:
class CriteriaPredictionModel(nn.Module):
    """
    RatingPredictionModel predicts multiple yes/no ratings with a deeper network architecture.
    The model uses binary classification for each output rating (5 in total).
    """
    def __init__(self, input_size=4096, hidden_size1=2048, hidden_size2=1024, hidden_size3=512, hidden_size4=256, hidden_size5=128,
                 output_size=5, dropout_rate=0.5):
        super(CriteriaPredictionModel, self).__init__()

        # First fully connected layer
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_rate)

        # Second fully connected layer
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_rate)

        # Adding more depth
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(dropout_rate)

        self.fc4 = nn.Linear(hidden_size3, hidden_size4)
        self.relu4 = nn.ReLU()
        self.dropout4 = nn.Dropout(dropout_rate)

        self.fc5 = nn.Linear(hidden_size4, hidden_size5)
        self.relu5 = nn.ReLU()
        self.dropout5 = nn.Dropout(dropout_rate)

        # Output layer for binary classification (yes/no for each of the 5 ratings)
        self.output_layer = nn.Linear(hidden_size5, output_size)
        self.sigmoid = nn.Sigmoid()  # Sigmoid for binary yes/no predictions

    def forward(self, x):
        # Forward pass through the network with multiple layers
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        x = self.relu3(x)
        x = self.dropout3(x)

        x = self.fc4(x)
        x = self.relu4(x)
        x = self.dropout4(x)

        x = self.fc5(x)
        x = self.relu5(x)
        x = self.dropout5(x)

        # Final binary classification
        ratings = self.output_layer(x)
        return self.sigmoid(ratings)  # Returns output_size values, each in range [0, 1] for yes/no classification


## `DeepClassificationWithRatingModel`

- **Description**: The `DeepClassificationWithRatingModel` integrates the `DeepDualCombinedModel` and adds a separate rating prediction model. If the classification output predicts class `0`, the rating model is triggered. It uses different criteria models for deadlift, squat, and lunges based on the predicted class.

- **Methods**:
  - `forward(combined_hidden, predicted_class)`:
    - Takes the `combined_hidden` state and the `predicted_class`.
    - If class `0` is predicted, the `dead_criteria_model` is used to predict ratings.
    - If class `1` is predicted, the `squat_criteria_model` is used.
    - If class `2` is predicted, the `lunges_criteria_model` is used.
    - Returns the predicted ratings based on the class.


In [86]:
class DeepClassificationWithRatingModel(nn.Module):
    """
    DeepClassificationWithRatingModel integrates the DeepDualCombinedModel and
    adds a separate rating prediction model. If the classification output predicts class `0`,
    the rating model is triggered.
    """
    def __init__(self):
        super(DeepClassificationWithRatingModel, self).__init__()

        # Rating model with more layers
        self.dead_criteria_model = CriteriaPredictionModel(output_size=5)
        self.lunges_criteria_model = CriteriaPredictionModel(output_size=7)
        self.squat_criteria_model = CriteriaPredictionModel(output_size=6)

    def forward(self,combined_hidden,predicted_class):
        try:
            # Initialize ratings as None
            ratings = None

            # If class `0` is predicted, trigger rating prediction
            if predicted_class == 0:
                ratings = self.dead_criteria_model(combined_hidden)
            elif predicted_class == 1:
                ratings = self.squat_criteria_model(combined_hidden)
            elif predicted_class == 2:
                ratings = self.lunges_criteria_model(combined_hidden)
            return ratings

        except Exception as e:
            print(f"Error during forward pass in DeepClassificationWithRatingModel: {e}")
            raise

## `PoseVideoDataset`

- **Description**: A custom PyTorch Dataset designed to load video frames and pose landmarks, along with action class labels and ratings. It supports deadlift, squat, and lunge actions with different rating models for each. The dataset processes video frames, extracts pose landmarks, and normalizes ratings based on action class.

- **Methods**:
  - `__len__()`: Returns the number of samples in the dataset.
  - `__getitem__(idx)`: Loads and processes the data at the specified index:
    - Loads video frames for both frontal and lateral views.
    - Processes and normalizes video frames using the processor.
    - Extracts pose landmarks and action class labels.
    - Retrieves and normalizes ratings based on action class (Deadlift, Squat, or Lunge).
  - `_process_ratings(df)`: Processes and normalizes the ratings data from the corresponding DataFrame:
    - Extracts relevant columns (ending in 'F' or 'L').
    - Normalizes the scores and applies a threshold (0 or 1 based on the condition).
    - Returns the mean of the scores for each rating.


In [87]:
def compute_pairwise_distances(points_tensor):
    """
    Compute pairwise distances between 33 pose landmarks for each frame.

    Args:
        points_tensor (torch.Tensor): Tensor of shape [num_frames, 33, 3],
                                       where each row is a point (x, y, z) for each frame.

    Returns:
        torch.Tensor: Tensor of shape [num_frames, 528], containing pairwise distances for each frame.
    """
    num_frames, num_points, _ = points_tensor.size()
    distances = []

    # Iterate over frames
    for frame_idx in range(num_frames):
        frame_points = points_tensor[frame_idx]  # Shape: [33, 3]
        frame_distances = []

        # Iterate over unique pairs of points
        for i in range(num_points):
            for j in range(i + 1, num_points):  # Ensure each pair is only computed once
                point1 = frame_points[i]
                point2 = frame_points[j]

                # Compute Euclidean distance
                dist = torch.sqrt(torch.sum((point1 - point2) ** 2))
                frame_distances.append(dist)

        # Append distances for the current frame
        distances.append(torch.stack(frame_distances))  # Shape: [528]

    # Stack distances for all frames
    return torch.stack(distances)  # Shape: [num_frames, 528]

In [88]:
class PoseVideoDataset(Dataset):

    def __init__(self, df, num_frames):
        self.df = df
        self.num_frames = num_frames
        self.train_df_dead = train_df_dead
        self.train_df_squat = train_df_squat
        self.train_df_lunge = train_df_lunge

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # Get frontal and lateral video paths and index
        num_video_frontal = row['Num Video Frontal']
        num_video_lateral = row['Num Video Lateral']
        num_idx = row['NumIdx']
        action = row['Action']

        # Extract pose landmarks from video frames
        pose_landmarks_frontal = row['front_pose']
        pose_landmarks_lateral = row['lat_pose']
        pose_landmarks_tensor_frontal = torch.tensor(pose_landmarks_frontal).float()
        pose_landmarks_tensor_lateral = torch.tensor(pose_landmarks_lateral).float()
        distances_frontal = compute_pairwise_distances(pose_landmarks_tensor_frontal)
        distances_lateral = compute_pairwise_distances(pose_landmarks_tensor_lateral)

        # print(f"pose_landmarks shape: {pose_landmarks_tensor_frontal.shape}")
        # print(f"distances_frontal shape: {distances_frontal.shape}")

        # pose_landmarks_tensor_frontal = torch.cat((pose_landmarks_tensor_frontal, distances_frontal), dim=0)
        # pose_landmarks_tensor_lateral = torch.cat((pose_landmarks_tensor_lateral, distances_lateral), dim=0)

        # Get labels (action class)
        label_class = torch.tensor(row['class'], dtype=torch.long)

        # Initialize ratings
        ratings = None

        # Check label_class and load appropriate DataFrame
        if label_class.item() == 0:  # Deadlift
            ratings = self._process_ratings(self.train_df_dead,row)
        elif label_class.item() == 1:  # Squat
            ratings = self._process_ratings(self.train_df_squat,row)
        elif label_class.item() == 2:  # Lunge
            ratings = self._process_ratings(self.train_df_lunge,row)

        # Convert ratings to tensor
        ratings = torch.tensor(ratings, dtype=torch.float32) if ratings is not None else None

        return (distances_frontal, distances_lateral, label_class, ratings)

    def _process_ratings(self, df,row):
        """
        Process the ratings DataFrame to extract and normalize scores.

        Args:
            df (pd.DataFrame): DataFrame containing ratings for the specific action.

        Returns:
            list: Normalized and thresholded scores.
        """
        # Select columns ending with 'F' or 'L'
        relevant_columns = [col for col in df.columns if col.endswith('F') or col.endswith('L')]

        # Extract ratings and normalize
        scores = row[relevant_columns].values


        # Apply threshold: Convert to 0 or 1 based on specific thresholding conditions
        thresholded_scores = np.where(scores >= 0.5, 1, 0)

        return thresholded_scores.tolist()  # Return mean of the scores for each sample

In [89]:
def create_dataloader(df, num_frames, batch_size=16, shuffle=True):
    """
    Creates a PyTorch DataLoader from the PoseVideoDataset.

    Args:
        df (pd.DataFrame): DataFrame containing the dataset information.
        num_frames (int): The number of frames to extract from each video.
        batch_size (int, optional): Batch size for the DataLoader. Defaults to 8.
        shuffle (bool, optional): Whether to shuffle the data. Defaults to True.

    Returns:
        DataLoader: A PyTorch DataLoader for the dataset.
    """
    dataset = PoseVideoDataset(df, num_frames)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)


## `train_combined_model`

- **Description**: This function trains both the classification model (`Dual_Combined_Model`) and the rating prediction model (`CriteriaPredictionModel`). It evaluates them after each epoch and saves the best-performing models based on validation loss. It also includes early stopping if no improvement is observed for a set number of epochs.

- **Args**:
  - `model (torch.nn.Module)`: The classification model (e.g., `Dual_Combined_Model`).
  - `criteria_model (torch.nn.Module)`: The rating prediction model (e.g., `CriteriaPredictionModel`).
  - `train_dataloader (DataLoader)`: A DataLoader providing the training data.
  - `eval_dataloader (DataLoader)`: A DataLoader providing the evaluation data.
  - `epochs (int, optional)`: Number of epochs to train. Defaults to 1000.
  - `lr (float, optional)`: Learning rate for the optimizer. Defaults to 1e-4.
  - `device (str, optional)`: The device to train the model on ('cpu' or 'cuda'). Defaults to 'cpu'.
  - `clip_grad_norm (float, optional)`: Maximum norm for gradient clipping. Defaults to 1.0.
  - `patience (int, optional)`: Number of epochs with no improvement after which training will be stopped. Defaults to 5.

- **Returns**:
  - None: The function prints the training progress, validation loss, classification accuracy, and rating accuracy during training.

- **Steps**:
  1. **Model Initialization**: Moves both the classification and rating models to the specified device.
  2. **Optimizer Setup**: Uses the Adam optimizer for both models with the specified learning rate.
  3. **Loss Functions**: Defines loss functions for both classification (`CrossEntropyLoss`) and ratings (`BCEWithLogitsLoss`).
  4. **Training Loop**:
     - Performs a forward pass through the classification model and computes the classification loss.
     - Computes the rating prediction loss only if the predicted class matches the actual class.
     - Combines both the classification and rating losses and performs backpropagation.
  5. **Gradient Clipping**: Clips gradients to avoid exploding gradients during backpropagation.
  6. **Model Evaluation**: Evaluates the models after each epoch and prints the results.
  7. **Early Stopping**: Monitors validation loss and triggers early stopping if no improvement is observed for a specified number of epochs.
  8. **Model Checkpointing**: Saves the best-performing models based on validation loss.


In [None]:
from torch.utils.tensorboard import SummaryWriter

def train_combined_model(Dual_Combined_Model, criteria_model, train_dataloader, eval_dataloader, epochs=1000, lr=1e-4,
                         device='cpu', clip_grad_norm=1.0, patience=7):

    # TensorBoard setup
    writer = SummaryWriter(log_dir=LOG_DIR)

    Dual_Combined_Model.to(device)
    criteria_model.to(device)

    # Set up optimizer and loss functions
    optimizer = optim.Adam(list(Dual_Combined_Model.parameters()) + list(criteria_model.parameters()), lr=lr)
    activity_loss = nn.CrossEntropyLoss()  # Loss for classification task
    feature_loss = nn.BCEWithLogitsLoss()  # Loss for rating task (binary)

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)

    best_eval_loss = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        Dual_Combined_Model.train()
        criteria_model.train()
        running_loss = 0.0

        for batch_idx, batch in enumerate(train_dataloader):
            # Calculate progress percentage
            progress = (batch_idx + 1) / len(train_dataloader) * 100

            # Unpack batch data and move to the specified device
            (pose_landmarks_tensor_frontal, pose_landmarks_tensor_lateral,
             label_class, ratings) = [tensor.to(device) for tensor in batch]

            optimizer.zero_grad()

            # Forward pass through the classification model
            classification_output, combined_hidden = Dual_Combined_Model(pose_landmarks_tensor_frontal, pose_landmarks_tensor_lateral)
            _, predicted_class = torch.max(classification_output, 1)

            # Loss for classification
            loss_class = activity_loss(classification_output, label_class)

            # Forward pass through the rating prediction model
            for i in range(label_class.size(0)):
                actual_class = label_class[i].item()
                predicted_class_item = predicted_class[i].item()

                if predicted_class_item == actual_class:
                    if actual_class == 0:  # Deadlift
                        ratings_output = criteria_model.dead_criteria_model(combined_hidden[i])
                    elif actual_class == 1:  # Squat
                        ratings_output = criteria_model.squat_criteria_model(combined_hidden[i])
                    elif actual_class == 2:  # Lunges
                        ratings_output = criteria_model.lunges_criteria_model(combined_hidden[i])
                    loss_ratings = feature_loss(ratings_output, ratings[i].float())
                else:
                    loss_ratings = torch.tensor(2.0, device=device, requires_grad=True)  # High loss if mismatched

                # Combine classification and rating losses
                loss = loss_class + loss_ratings

                # Backward pass and optimization
                loss.backward()
                torch.nn.utils.clip_grad_norm_(list(Dual_Combined_Model.parameters()) + list(criteria_model.parameters()), clip_grad_norm)
                optimizer.step()

                running_loss += loss.item()

                # Print progress during each epoch
                print(f"Epoch [{epoch + 1}/{epochs}], Progress: {progress:.2f}%, Batch [{batch_idx + 1}/{len(train_dataloader)}], Loss: {loss.item():.4f}")

        avg_loss = running_loss / len(train_dataloader)

        # Log training loss to TensorBoard
        writer.add_scalar("Loss/Train", avg_loss, epoch)

        # Evaluate both models after each epoch
        eval_loss, eval_accuracy, eval_rating_accuracy = evaluate_combined_model(
            Dual_Combined_Model, criteria_model, eval_dataloader, activity_loss, feature_loss, device
        )

        # Log validation metrics to TensorBoard
        writer.add_scalar("Loss/Validation", eval_loss, epoch)
        writer.add_scalar("Accuracy/Classification", eval_accuracy, epoch)
        writer.add_scalars("Rating Accuracy", {
            "Deadlift": eval_rating_accuracy['deadlift'],
            "Squat": eval_rating_accuracy['squat'],
            "Lunges": eval_rating_accuracy['lunges']
        }, epoch)

        # Print summary for each epoch
        print(f"Epoch [{epoch + 1}/{epochs}] Summary: "
              f"Train Loss: {avg_loss:.4f}, Eval Loss: {eval_loss:.4f}, "
              f"Accuracy: {eval_accuracy:.4f}")

        # Scheduler step
        scheduler.step(eval_loss)

        # Early stopping and model saving
        if eval_loss < best_eval_loss:
            best_eval_loss = eval_loss
            patience_counter = 0
            torch.save(Dual_Combined_Model.state_dict(), f"best_combined_model_epoch_{epoch + 1}.pt")
            torch.save(criteria_model.state_dict(), f"best_rating_model_epoch_{epoch + 1}.pt")
            print("Model checkpoint saved.")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    writer.close()  # Close TensorBoard writer
    print("Training complete.")


## `evaluate_combined_model`

- **Description**: This function evaluates both the classification and rating prediction models on the provided dataset. It computes the average evaluation loss, classification accuracy, and rating accuracies for different actions (deadlift, squat, lunges) using a given `DataLoader` and loss functions.

- **Args**:
  - `classification_model (torch.nn.Module)`: The classification model (e.g., `Dual_Combined_Model`).
  - `criteria_model (torch.nn.Module)`: The rating prediction model (e.g., `CriteriaPredictionModel`).
  - `dataloader (DataLoader)`: A DataLoader providing the evaluation data.
  - `criterion_class (nn.Module)`: The loss function for the classification task.
  - `criterion_ratings (nn.Module)`: The loss function for the rating task.
  - `device (str)`: The device to perform evaluation on ('cpu' or 'cuda').

- **Returns**:
  - `float`: The average evaluation loss across the dataset.
  - `dict`: A dictionary containing the classification accuracy and rating accuracies for each action (deadlift, squat, lunges).

- **Steps**:
  1. **Set Evaluation Mode**: Sets the models (`classification_model` and `criteria_model`) to evaluation mode.
  2. **Initialize Metrics**: Initializes accumulators for loss, classification accuracy, and rating accuracy for each action.
  3. **Evaluation Loop**:
     - Performs a forward pass through the classification model to get predictions and computes classification loss.
     - For each sample in the batch, if the predicted class matches the actual class, it computes the rating accuracy for that action.
     - The rating predictions are compared with the ground truth ratings, and accuracy is calculated for each action (deadlift, squat, lunges).
  4. **Compute Average Metrics**: Computes the average classification loss, overall classification accuracy, and rating accuracy for each action.
  5. **Return Results**: Returns the average loss, classification accuracy, and rating accuracies.


In [91]:
def evaluate_combined_model(classification_model, criteria_model, dataloader, criterion_class, criterion_ratings, device):
    """
    Evaluates both the classification and rating models on the provided dataset.

    Args:
        classification_model (torch.nn.Module): The classification model.
        criteria_model (torch.nn.Module): The rating prediction model.
        dataloader (DataLoader): A DataLoader providing the evaluation data.
        criterion_class (nn.Module): The loss function for classification.
        criterion_ratings (nn.Module): The loss function for ratings.
        device (str): The device to perform evaluation on ('cpu' or 'cuda').

    Returns:
        float: Average evaluation loss.
        dict: Classification accuracy and rating accuracies for each action (deadlift, squat, lunges).
    """
    classification_model.eval()
    criteria_model.eval()
    total_loss = 0.0
    correct_predictions_class = 0
    total_samples_class = 0

    # Initialize accumulators for rating accuracy per feature
    rating_accumulators = {'deadlift': 0, 'squat': 0, 'lunges': 0}
    total_rating_samples = {'deadlift': 0, 'squat': 0, 'lunges': 0}

    a =0
    with torch.no_grad():
        for batch in dataloader:
            (pose_landmarks_tensor_frontal,pose_landmarks_tensor_lateral,
             label_class, ratings) = [tensor.to(device) for tensor in batch]

            # Forward pass through the classification model
            classification_output, combined_hidden = classification_model(pose_landmarks_tensor_frontal, pose_landmarks_tensor_lateral)
            _, predicted_class = torch.max(classification_output, 1)

            # Classification loss and accuracy
            loss_class = criterion_class(classification_output, label_class)
            total_loss += loss_class.item()
            correct_predictions_class += (predicted_class == label_class).sum().item()
            total_samples_class += label_class.size(0)

            # Rating accuracy calculation per feature (only if predicted class matches actual class)
            for i in range(label_class.size(0)):  # Loop over each sample in the batch
                actual_class = label_class[i].item()
                predicted_class_item = predicted_class[i].item()

                if actual_class == predicted_class_item:
                    if actual_class == 0:  # Deadlift
                        ratings_output = criteria_model.dead_criteria_model(combined_hidden[i])
                        predicted_ratings = torch.sigmoid(ratings_output) > 0.5
                        correct_ratings = (predicted_ratings == ratings[i].byte()).sum().item()
                        rating_accumulators['deadlift'] += correct_ratings
                        total_rating_samples['deadlift'] += ratings[i].numel()
                        print(f"Predicted : {predicted_ratings}")
                        print(f"actual : {ratings[i].byte()}")
                        print(correct_ratings)
                        print(f"rating_accumulators : {rating_accumulators['deadlift']}")
                        print(f"total_rating_samples : {total_rating_samples['deadlift']}")
                    elif actual_class == 1:  # Squat
                        ratings_output = criteria_model.squat_criteria_model(combined_hidden[i])
                        predicted_ratings = torch.sigmoid(ratings_output) > 0.5
                        correct_ratings = (predicted_ratings == ratings[i].byte()).sum().item()
                        rating_accumulators['squat'] += correct_ratings
                        total_rating_samples['squat'] += ratings[i].numel()
                        print(f"Predicted : {predicted_ratings}")
                        print(f"actual : {ratings[i].byte()}")
                        print(f"correct_ratings : {correct_ratings}")
                        print(f"rating_accumulators : {rating_accumulators['squat']}")
                        print(f"total_rating_samples : {total_rating_samples['squat']}")
                    elif actual_class == 2:  # Lunges
                        ratings_output = criteria_model.lunges_criteria_model(combined_hidden[i])
                        predicted_ratings = torch.sigmoid(ratings_output) > 0.5
                        correct_ratings = (predicted_ratings == ratings[i].byte()).sum().item()
                        rating_accumulators['lunges'] += correct_ratings
                        total_rating_samples['lunges'] += ratings[i].numel()
                        print(f"Predicted : {predicted_ratings}")
                        print(f"actual : {ratings[i].byte()}")
                        print(correct_ratings)
                        print(f"rating_accumulators : {rating_accumulators['lunges']}")
                        print(f"total_rating_samples : {total_rating_samples['lunges']}")


            print(f"{a} , {len(dataloader)}")
            a=a=1

    # Calculate the average loss and classification accuracy
    avg_loss = total_loss / len(dataloader)
    accuracy_class = correct_predictions_class / total_samples_class if total_samples_class > 0 else 0.0
    # Calculate rating accuracy for each action
    rating_accuracy = {
        'deadlift': rating_accumulators['deadlift'] / total_rating_samples['deadlift'] if total_rating_samples['deadlift'] > 0 else 0.0,
        'squat': rating_accumulators['squat'] / total_rating_samples['squat'] if total_rating_samples['squat'] > 0 else 0.0,
        'lunges': rating_accumulators['lunges'] / total_rating_samples['lunges'] if total_rating_samples['lunges'] > 0 else 0.0
    }

    return avg_loss, accuracy_class, rating_accuracy


In [92]:
dataloader = create_dataloader(train_df, 16, batch_size=1)
eval_dataloader = create_dataloader(val_df, 16, batch_size=1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [93]:
for data in dataloader:
    print(data[0].shape)
    break


torch.Size([1, 16, 528])


In [101]:
# Clear previous TensorBoard runs if needed
!rm -rf /content/drive/MyDrive/Vision_GYM_Research/tensorboard_logs

# Start TensorBoard in Colab
%load_ext tensorboard
%tensorboard --logdir $LOG_DIR


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


<IPython.core.display.Javascript object>

In [102]:
%reload_ext tensorboard

In [None]:
# Initialize the model
dual_combined_model = Dual_Combined_Model()
rating_model = DeepClassificationWithRatingModel()

print(device)

# Train the model
train_combined_model(dual_combined_model, rating_model, dataloader, eval_dataloader, epochs=1000, lr=1e-4, device=device)

cuda


In [84]:
# Initialize the model
dual_combined_model = Dual_Combined_Model()
rating_model = DeepClassificationWithRatingModel()

print(device)

# Train the model
train_combined_model(dual_combined_model, rating_model, dataloader, eval_dataloader, epochs=1000, lr=1e-4, device=device)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Predicted : tensor([ True,  True,  True,  True,  True, False], device='cuda:0')
actual : tensor([1, 1, 1, 1, 1, 0], device='cuda:0', dtype=torch.uint8)
correct_ratings : 6
rating_accumulators : 35
total_rating_samples : 42
1 , 99
pose_landmarks shape: torch.Size([16, 33, 3])
distances_frontal shape: torch.Size([16, 528])
1 , 99
pose_landmarks shape: torch.Size([16, 33, 3])
distances_frontal shape: torch.Size([16, 528])
Predicted : tensor([True, True, True, True, True, True, True], device='cuda:0')
actual : tensor([1, 1, 1, 1, 1, 1, 1], device='cuda:0', dtype=torch.uint8)
7
rating_accumulators : 7
total_rating_samples : 7
1 , 99
pose_landmarks shape: torch.Size([16, 33, 3])
distances_frontal shape: torch.Size([16, 528])
Predicted : tensor([ True,  True,  True, False,  True], device='cuda:0')
actual : tensor([1, 1, 1, 1, 1], device='cuda:0', dtype=torch.uint8)
4
rating_accumulators : 28
total_rating_samples : 40
1 , 99
pose

In [None]:
# Define the path to the saved model weights
model_path = "best_model_epoch_1.pt"
criteria_model_path = "best_rating_model_epoch_1.pt"

dual_combined_model = Dual_Combined_Model()
rating_model = DeepClassificationWithRatingModel()

# Load the saved model weights
dual_combined_model.load_state_dict(torch.load(model_path))
rating_model.load_state_dict(torch.load(criteria_model_path))

print(device)

# Continue training the model
train_combined_model(dual_combined_model, rating_model, dataloader, eval_dataloader,
                     epochs=1000, lr=1e-4, device=device)

KeyboardInterrupt: 

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [31]:
!ls

drive  sample_data


In [32]:
!pwd

/content


In [None]:
"/content/drive/MyDrive/Vision_GYM_Research/Data"