In [1]:
# %pip install opencv-python
%pip install imageio[ffmpeg]
%pip install tqdm



In [2]:
# Mount Google Drive
from google.colab import drive
import os

drive.mount('/content/gdrive')
# Define base folder path
base_path = '/content/gdrive/MyDrive/RGB_data_stream'

Mounted at /content/gdrive


In [3]:
import os

csv_path = os.path.join(base_path,'data/3DYoga90_corrected.csv')
sequence_path = os.path.join(base_path, 'short/downloaded_log.txt')
save_path = os.path.join(base_path, 'Second_Resnet_nov6')
os.makedirs(save_path, exist_ok=True)
pose_list = ['mountain', 'half-way-lift', 'standing-forward-bend', 'downward-dog']
NUM_CLASSES = len(pose_list)
video_dir = os.path.join(base_path, 'short')
preprocessed_dir = os.path.join(base_path, 'complete_pre_processed')
os.makedirs(preprocessed_dir, exist_ok=True)

In [4]:
# Constants
FRAME_HEIGHT = 224  # VGG16 input size
FRAME_WIDTH = 224
SEQUENCE_LENGTH = 16

BATCH_SIZE = 16
LEARNING_RATE = 0.0001
VALIDATION_SPLIT = 0.2
TEST_SPLIT = 0.1
NUM_EPOCHS = 50

# Dataset

Work Left
1. Data Augmentation
2. Expanding to more classes

In [5]:
import os
import torch
import pandas as pd
import numpy as np
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import imageio

class YogaVideoDataset(Dataset):
    def __init__(self, csv_path, sequence_path, pose_list, video_dir, preprocessed_dir):
        with open(sequence_path) as f:
            sequence_list = f.read().splitlines()
            sequence_list = [int(x) for x in sequence_list]

        self.df = pd.read_csv(csv_path)
        self.df = self.df[self.df['sequence_id'].isin(sequence_list)]
        self.df = self.df[self.df['l3_pose'].isin(pose_list)]

        self.pose_to_idx = {pose: idx for idx, pose in enumerate(pose_list)}
        self.length_of_dataset = len(self.df)

        self.video_dir = video_dir
        self.preprocessed_dir = preprocessed_dir

        os.makedirs(self.preprocessed_dir, exist_ok=True)

        self.transforms = transforms.Compose([
            transforms.Resize((FRAME_HEIGHT, FRAME_WIDTH)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        self.cache = dict()

    def __len__(self):
        return self.length_of_dataset

    def __getitem__(self, i):
        sequence_id = self.df.iloc[i]['sequence_id']
        pose = self.df.iloc[i]['l3_pose']
        label = self.pose_to_idx[pose]

        # Check cache first
        if sequence_id in self.cache:
            frames = self.cache[sequence_id]
        else:
            # Path to preprocessed tensor file
            preprocessed_path = os.path.join(self.preprocessed_dir, f"{sequence_id}.pt")

            if os.path.exists(preprocessed_path):
                frames = torch.load(preprocessed_path, weights_only=True)
            else:
                # Process and save if not already preprocessed
                video_path = os.path.join(self.video_dir, f"{sequence_id}.mp4")
                frames = self._get_frames(video_path)
                torch.save(frames, preprocessed_path)

            # Add to cache
            # self.cache[sequence_id] = frames

        return frames, label

    def _get_frames(self, video_path):
        reader = imageio.get_reader(video_path, 'ffmpeg')
        total_frames = reader.count_frames()
        indices = np.linspace(0, total_frames - 1, SEQUENCE_LENGTH, dtype=int)

        frames = []
        for i, frame in enumerate(reader):
            # if i in indices:
                frame = Image.fromarray(frame)
                frame = self.transforms(frame)
                frames.append(frame)

        reader.close()
        frames = torch.stack(frames)
        return frames

# Model
``` (mobile_net to get feature map and LSTM to go through the frame sequences)```
Trying out average of LSTM from each time_step
Work Left
1. Using only last time step output from LSTM to using average value, max value, using attention mechanism
2. Using other imagenet model to extract the feature map

In [6]:
# import torch
# import torch.nn as nn
# import torchvision.models as models
# import torch.nn.functional as F

# class CNNLSTM(nn.Module):
#     def __init__(self, num_classes, lstm_hidden_size=512, lstm_layers=1, dropout=0.5):
#         super(CNNLSTM, self).__init__()

#         # Load pretrained MobileNetV2
#         mobilenet = models.mobilenet_v2(pretrained=True)
#         self.features = nn.Sequential(*list(mobilenet.features))
#         self.pool = nn.AdaptiveAvgPool2d((1, 1))

#         # Initially freeze all MobileNetV2 parameters
#         self._freeze_features()

#         # MobileNetV2 outputs 1280 features after pooling
#         self.feature_size = 1280

#         # LSTM layer
#         self.lstm = nn.LSTM(
#             input_size=self.feature_size,
#             hidden_size=lstm_hidden_size,
#             num_layers=lstm_layers,
#             batch_first=True,
#             dropout=dropout if lstm_layers > 1 else 0
#         )

#         # Gradual dimension reduction in classifier with batch normalization
#         self.classifier = nn.Sequential(
#             nn.Linear(lstm_hidden_size, 512),
#             nn.BatchNorm1d(512),
#             nn.ReLU(),
#             nn.Dropout(dropout),

#             nn.Linear(512, 256),
#             nn.BatchNorm1d(256),
#             nn.ReLU(),
#             nn.Dropout(dropout),

#             nn.Linear(256, 128),
#             nn.BatchNorm1d(128),
#             nn.ReLU(),
#             nn.Dropout(dropout/2),  # Reducing dropout as we get closer to output

#             nn.Linear(128, num_classes)
#         )

#         # Initialize weights for the classifier layers
#         self._initialize_weights()

#     def _initialize_weights(self):
#         for m in self.classifier.modules():
#             if isinstance(m, nn.Linear):
#                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
#                 if m.bias is not None:
#                     nn.init.constant_(m.bias, 0)
#             elif isinstance(m, nn.BatchNorm1d):
#                 nn.init.constant_(m.weight, 1)
#                 nn.init.constant_(m.bias, 0)

#     def _freeze_features(self):
#         for param in self.features.parameters():
#             param.requires_grad = False

#     def unfreeze_features_gradually(self, num_layers=3, start_from_end=True):
#         """
#         Unfreeze a specified number of convolutional layers from MobileNetV2.
#         Args:
#             num_layers: Number of layers to unfreeze
#             start_from_end: If True, start unfreezing from the end (default)
#         """
#         # First freeze all layers
#         self._freeze_features()

#         # Get list of all feature layers
#         features_list = list(self.features.modules())
#         conv_layers = [m for m in features_list if isinstance(m, nn.Conv2d)]

#         # Determine which layers to unfreeze
#         if start_from_end:
#             layers_to_unfreeze = conv_layers[-num_layers:]
#         else:
#             layers_to_unfreeze = conv_layers[:num_layers]

#         # Unfreeze the selected layers
#         for layer in layers_to_unfreeze:
#             for param in layer.parameters():
#                 param.requires_grad = True

#     def forward(self, x):
#         batch_size, seq_length, c, h, w = x.size()

#         # Process each frame through CNN
#         x = x.view(batch_size * seq_length, c, h, w)
#         x = self.features(x)
#         x = self.pool(x)

#         # Reshape for LSTM
#         x = x.view(batch_size, seq_length, self.feature_size)

#         # LSTM processing
#         lstm_out, _ = self.lstm(x)

#         # Try different temporal pooling strategies
#         # Strategy 1: Mean pooling (default)
#         x = torch.mean(lstm_out, dim=1)
#         # Strategy 2: Last state
#         # x = lstm_out[:, -1, :]
#         # Strategy 3: Max pooling
#         # x = torch.max(lstm_out, dim=1)[0]

#         # Classification
#         x = self.classifier(x)
#         return x

#     def get_trainable_params(self):
#         """Return the number of trainable parameters"""
#         return sum(p.numel() for p in self.parameters() if p.requires_grad)

In [7]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F

class CNNLSTM(nn.Module):
    def __init__(self, num_classes, lstm_hidden_size=512, lstm_layers=1, dropout=0.5):
        super(CNNLSTM, self).__init__()

        # Load pretrained ResNet18
        resnet = models.resnet18(pretrained=True)
        # Extract all layers except the final fully connected layer
        self.features = nn.Sequential(*list(resnet.children())[:-2])  # Remove last FC layer and avgpool
        self.pool = nn.AdaptiveAvgPool2d((1, 1))

        # Initially freeze all ResNet parameters
        self._freeze_features()

        # ResNet18 outputs 512 features after pooling (for ResNet50, this would be 2048)
        self.feature_size = 512

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=self.feature_size,
            hidden_size=lstm_hidden_size,
            num_layers=lstm_layers,
            batch_first=True,
            dropout=dropout if lstm_layers > 1 else 0
        )

        # Gradual dimension reduction in classifier with batch normalization
        self.classifier = nn.Sequential(
            nn.Linear(lstm_hidden_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout / 2),  # Reducing dropout as we get closer to output

            nn.Linear(128, num_classes)
        )

        # Initialize weights for the classifier layers
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.classifier.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _freeze_features(self):
        for param in self.features.parameters():
            param.requires_grad = False

    def unfreeze_features_gradually(self, num_layers=3, start_from_end=True):
        """
        Unfreeze a specified number of convolutional layers from ResNet.
        Args:
            num_layers: Number of layers to unfreeze
            start_from_end: If True, start unfreezing from the end (default)
        """
        # First freeze all layers
        self._freeze_features()

        # Get list of all feature layers
        features_list = list(self.features.modules())
        conv_layers = [m for m in features_list if isinstance(m, nn.Conv2d)]

        # Determine which layers to unfreeze
        if start_from_end:
            layers_to_unfreeze = conv_layers[-num_layers:]
        else:
            layers_to_unfreeze = conv_layers[:num_layers]

        # Unfreeze the selected layers
        for layer in layers_to_unfreeze:
            for param in layer.parameters():
                param.requires_grad = True

    def forward(self, x):
        batch_size, seq_length, c, h, w = x.size()

        # Process each frame through CNN
        x = x.view(batch_size * seq_length, c, h, w)
        x = self.features(x)
        x = self.pool(x)

        # Reshape for LSTM
        x = x.view(batch_size, seq_length, self.feature_size)

        # LSTM processing
        lstm_out, _ = self.lstm(x)

        # Try different temporal pooling strategies
        # Strategy 1: Mean pooling (default)
        x = torch.mean(lstm_out, dim=1)
        # Strategy 2: Last state
        # x = lstm_out[:, -1, :]
        # Strategy 3: Max pooling
        # x = torch.max(lstm_out, dim=1)[0]

        # Classification
        x = self.classifier(x)
        return x

    def get_trainable_params(self):
        """Return the number of trainable parameters"""
        return sum(p.numel() for p in self.parameters() if p.requires_grad)

In [8]:
from torch.utils.data import random_split

def train_val_test_split(dataset):
    total_size = len(dataset)
    test_size = int(TEST_SPLIT * total_size)
    val_size = int(VALIDATION_SPLIT * total_size)
    train_size = total_size - val_size - test_size

    train_dataset, val_dataset, test_dataset = random_split(
        dataset,
        [train_size, val_size, test_size],
        generator=torch.Generator().manual_seed(42)  # For reproducibility
    )

    return train_dataset, val_dataset, test_dataset

def create_data_loaders(train_dataset, val_dataset, test_dataset):
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=2,
        pin_memory = True
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=2,
        pin_memory = True
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=2,
        pin_memory = True
    )

    return train_loader, val_loader, test_loader

In [9]:
def plot_training_curves(history, fsave='training_curves.png'):
    tsave = os.path.join(save_path, fsave)
    # plt.style.use('seaborn')
    fig, axs = plt.subplots(2, 3, figsize=(18, 10))

    # Loss curves
    axs[0, 0].plot(history['train_loss'], label='Train Loss', marker='o')
    axs[0, 0].plot(history['val_loss'], label='Validation Loss', marker='o')
    axs[0, 0].set_title('Loss')
    axs[0, 0].legend()

    # Accuracy curves
    axs[0, 1].plot(history['train_acc'], label='Train Accuracy', marker='o')
    axs[0, 1].plot(history['val_acc'], label='Validation Accuracy', marker='o')
    axs[0, 1].set_title('Accuracy')
    axs[0, 1].legend()

    # Learning rate
    axs[0, 2].plot(history['learning_rates'], label='Learning Rate', marker='o')
    axs[0, 2].set_title('Learning Rate')
    axs[0, 2].set_yscale('log')
    axs[0, 2].legend()

    # Precision
    axs[1, 0].plot(history['train_precision'], label='Train Precision', marker='o')
    axs[1, 0].plot(history['val_precision'], label='Validation Precision', marker='o')
    axs[1, 0].set_title('Precision')
    axs[1, 0].legend()

    # Recall
    axs[1, 1].plot(history['train_recall'], label='Train Recall', marker='o')
    axs[1, 1].plot(history['val_recall'], label='Validation Recall', marker='o')
    axs[1, 1].set_title('Recall')
    axs[1, 1].legend()

    # F1 Score
    axs[1, 2].plot(history['train_f1'], label='Train F1', marker='o')
    axs[1, 2].plot(history['val_f1'], label='Validation F1', marker='o')
    axs[1, 2].set_title('F1 Score')
    axs[1, 2].legend()

    for ax in axs.flat:
        ax.set_xlabel('Epoch')
        ax.grid(True)

    plt.tight_layout()
    plt.savefig(tsave)
    plt.show()
    plt.close()


In [10]:
class EarlyStopping:
    """Early stopping to prevent overfitting"""
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        # on default = 7 successive val_loss increase stop
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [11]:
import os
import torch
def save_checkpoint(model, optimizer, epoch, history, best_path=None):
    if best_path is not None:
        chk_path = os.path.join(save_path, f'best_model.pth')
        print(f"Saving checkpoint to {chk_path}")
    else:
        chk_path = os.path.join(save_path, f'checkpath_model.pth')
        print(f"Saving checkpoint to {chk_path}")

    # Combine model, optimizer, and history into one dictionary
    checkpoint = {
        'epoch': epoch + 1,  # Save the next epoch number for resuming
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'history': history  # Save history along with the model and optimizer
    }

    # Save everything in a single file using torch.save
    torch.save(checkpoint, chk_path)
    print(f"Checkpoint saved at epoch {epoch + 1}")

In [12]:
def load_checkpoint(model, optimizer, checkpoint_path):
    """
    Load model and training state from a checkpoint
    """
    print(f"Loading checkpoint from {checkpoint_path}")
    checkpoint = torch.load(checkpoint_path)

    # Load model and optimizer states
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    # Get the epoch number to resume from
    start_epoch = checkpoint['epoch']

    # Load training history with new metrics
    history = checkpoint.get('history', {
        'train_loss': [], 'val_loss': [],
        'train_acc': [], 'val_acc': [],
        'train_precision': [], 'train_recall': [], 'train_f1': [],
        'val_precision': [], 'val_recall': [], 'val_f1': [],
        'learning_rates': []
    })

    return model, optimizer, start_epoch, history


In [13]:
import torch
from torch.optim import lr_scheduler
from sklearn.metrics import precision_score, recall_score, f1_score
from tqdm import tqdm

def train_model(model, train_loader, val_loader, criterion, optimizer,
                num_epochs=50, patience=7, log_interval=10, checkpoint_path=None, unfreeze_epoch = 5, num_layers_unfreeze =3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    print("Using device:", device)

    # Initialize scheduler
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4)

    # Initialize early stopping (example implementation; you may need to define your own)
    early_stopping = EarlyStopping(patience=patience, min_delta=1e-4)

    start_epoch = 0
    best_val_loss = float('inf')

    # Initialize history for loss, accuracy, precision, recall, and F1
    history = {
        'train_loss': [], 'val_loss': [],
        'train_acc': [], 'val_acc': [],
        'train_precision': [], 'train_recall': [], 'train_f1': [],
        'val_precision': [], 'val_recall': [], 'val_f1': [],
        'learning_rates': []
    }

    # Check for checkpoint and load if available
    if checkpoint_path and os.path.exists(checkpoint_path):
        model, optimizer, start_epoch, history = load_checkpoint(
            model, optimizer, checkpoint_path
        )
        print(f"Resuming training from epoch {start_epoch}")
        torch.cuda.empty_cache()

    for epoch in range(start_epoch, num_epochs):

        if epoch == unfreeze_epoch:
            print(f"Unfreezing last {num_layers_unfreeze} layers of MobileNetV2")
            model.unfreeze_features_gradually(num_layers=num_layers_unfreeze)
        print(f'\nEpoch {epoch+1}/{num_epochs}')

        current_lr = optimizer.param_groups[0]['lr']
        history['learning_rates'].append(current_lr)
        print(f"Current Learning Rate: {current_lr}")

        # Training phase
        model.train()
        train_loss, train_correct, train_total = 0.0, 0, 0
        train_true, train_pred = [], []

        train_loader_tqdm = tqdm(enumerate(train_loader), total=len(train_loader), desc="Training")
        for batch_idx, (inputs, labels) in train_loader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            train_correct += (predicted == labels).sum().item()
            train_total += labels.size(0)

            # Collect true and predicted labels for precision/recall
            train_true.extend(labels.cpu().numpy())
            train_pred.extend(predicted.cpu().numpy())

            # Log batch-level updates
            if batch_idx % log_interval == 0:
                train_loader_tqdm.set_postfix({
                    'loss': train_loss / (batch_idx + 1),
                    'accuracy': 100.0 * train_correct / train_total
                })

        # Calculate training metrics
        train_loss /= len(train_loader)
        train_acc = 100.0 * train_correct / train_total
        train_precision = precision_score(train_true, train_pred, average='weighted')
        train_recall = recall_score(train_true, train_pred, average='weighted')
        train_f1 = f1_score(train_true, train_pred, average='weighted')

        # Validation phase
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_true, val_pred = [], []

        val_loader_tqdm = tqdm(enumerate(val_loader), total=len(val_loader), desc="Validation")
        with torch.no_grad():
            for batch_idx, (inputs, labels) in val_loader_tqdm:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)

                # Collect true and predicted labels for precision/recall
                val_true.extend(labels.cpu().numpy())
                val_pred.extend(predicted.cpu().numpy())

                # Log batch-level updates for validation
                if batch_idx % log_interval == 0:
                    val_loader_tqdm.set_postfix({
                        'loss': val_loss / (batch_idx + 1),
                        'accuracy': 100.0 * val_correct / val_total
                    })

        # Calculate validation metrics
        val_loss /= len(val_loader)
        val_acc = 100.0 * val_correct / val_total
        val_precision = precision_score(val_true, val_pred, average='weighted')
        val_recall = recall_score(val_true, val_pred, average='weighted')
        val_f1 = f1_score(val_true, val_pred, average='weighted')

        # Update history
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)
        history['train_precision'].append(train_precision)
        history['train_recall'].append(train_recall)
        history['train_f1'].append(train_f1)
        history['val_precision'].append(val_precision)
        history['val_recall'].append(val_recall)
        history['val_f1'].append(val_f1)

        # Print metrics at the end of the epoch
        print(f'\nEpoch {epoch+1}/{num_epochs} Summary:')
        print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Train Precision: {train_precision:.2f} | Train Recall: {train_recall:.2f} | Train F1: {train_f1:.2f}')
        print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}% | Val Precision: {val_precision:.2f} | Val Recall: {val_recall:.2f} | Val F1: {val_f1:.2f}')

        # Save the best model checkpoint
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_path = os.path.join(save_path, 'best_model.pth')
            save_checkpoint(model, optimizer, epoch, history, best_model_path)
            print(f"New best model saved! Validation Loss: {best_val_loss:.4f}")

        # Adjust learning rate based on validation loss
        scheduler.step(val_loss)

        # Check for early stopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break

    return model, history


In [14]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import torch
import numpy as np
import os
from tqdm import tqdm

def evaluate_model(model, test_loader, criterion, class_names,spath=save_path, fsave='confusion_matrix.png'):
    """
    Evaluate model on test set

    Args:
        model: PyTorch model
        test_loader: DataLoader for test data
        criterion: Loss function
        class_names: List of class names
        save_path: Directory to save the plot
        fsave: Filename for confusion matrix plot
    """
    csave = os.path.join(spath, fsave)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()

    test_loss = 0.0
    test_correct = 0
    test_total = 0
    all_predictions = []
    all_labels = []

    # Create progress bar
    test_loader_tqdm = tqdm(test_loader, desc="Testing")

    with torch.no_grad():
        for inputs, labels in test_loader_tqdm:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Get predictions
            _, predicted = torch.max(outputs, 1)
            # Calculate accuracy
            test_correct += (predicted == labels).sum().item()
            test_total += labels.size(0)

            # Store predictions and labels for confusion matrix
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    test_loss = test_loss / len(test_loader)
    accuracy = 100 * test_correct / test_total

    # Create confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)

    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')

    # Save the plot
    plt.savefig(csave)
    plt.show()
    plt.close()

    print(f'Test Loss: {test_loss:.4f}')
    print(f'Test Accuracy: {accuracy:.2f}%')

    return test_loss, accuracy

In [None]:
print("Loading Data")
dataset = YogaVideoDataset(csv_path, sequence_path, pose_list, video_dir, preprocessed_dir)
train_dataset, val_dataset, test_dataset = train_val_test_split(dataset)
train_loader, val_loader, test_loader = create_data_loaders(train_dataset, val_dataset, test_dataset)
print("Finished Loading Data")

model = CNNLSTM(num_classes=NUM_CLASSES)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

print("Training the model")
checkpoint_path = os.path.join(save_path, 'checkpath_model.pth')
model, history = train_model(
    model,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    NUM_EPOCHS,
    patience=7,
    log_interval=10,
    checkpoint_path=checkpoint_path
)
# Plot the training curves
plot_training_curves(history)
if checkpoint_path and os.path.exists(checkpoint_path):
        model, optimizer, start_epoch, history = load_checkpoint(
            model, optimizer, checkpoint_path
        )
        print(f"Resuming training from epoch {start_epoch}")
evaluate_model(model, test_loader, criterion, pose_list)

model_save_path = os.path.join(save_path, 'my_model.pth')
torch.save(model.state_dict(), model_save_path)

Loading Data
Finished Loading Data


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 182MB/s]


Training the model
Using device: cuda

Epoch 1/50
Current Learning Rate: 0.0001


Training:   0%|          | 0/31 [00:00<?, ?it/s]

In [None]:
plot_training_curves(history)

In [None]:
evaluate_model(model, test_loader, criterion, pose_list)

In [None]:
checkpoint_path = os.path.join(save_path, 'best_model.pth')
if checkpoint_path and os.path.exists(checkpoint_path):
        model, optimizer, start_epoch, history = load_checkpoint(
            model, optimizer, checkpoint_path
        )
        print(f"Resuming training from epoch {start_epoch}")
evaluate_model(model, test_loader, criterion, pose_list)

# TO EXAMINE THE DATASET FOR FAULTS


In [None]:
from tqdm import tqdm

def find_potential_label_errors(model, dataset, dataloader, threshold=0.9, device='cuda'):
    # Move model to GPU
    model = model.to(device)
    model.eval()

    potential_errors = {
        'sequence_id': [],
        'predicted_pose': [],
        'given_pose': [],
        'confidence': []
    }

    # Reverse mapping from index to pose name
    idx_to_pose = {v: k for k, v in dataset.pose_to_idx.items()}

    with torch.no_grad():
        for batch_idx, (sequences, labels) in enumerate(tqdm(dataloader, desc="Analyzing sequences")):
            # Move tensors to GPU
            sequences = sequences.to(device)
            labels = labels.to(device)

            # Get model predictions
            logits = model(sequences)  # Your raw logits
            probabilities = torch.nn.functional.softmax(logits, dim=1)
            predictions = torch.argmax(probabilities, dim=1)

            # Find high confidence disagreements
            for idx, (pred, true_label, prob) in enumerate(zip(predictions, labels, probabilities)):
                confidence = prob[pred].item()

                if pred != true_label and confidence > threshold:
                    # Get global index and sequence_id
                    global_idx = batch_idx * dataloader.batch_size + idx
                    sequence_id = dataset.df.iloc[global_idx]['sequence_id']

                    potential_errors['sequence_id'].append(sequence_id)
                    potential_errors['predicted_pose'].append(idx_to_pose[pred.item()])
                    potential_errors['given_pose'].append(idx_to_pose[true_label.item()])
                    potential_errors['confidence'].append(confidence)

    return potential_errors

def analyze_yoga_errors(potential_errors):
    """Analyze patterns in potential errors for yoga poses"""
    error_df = pd.DataFrame(potential_errors)

    print(f"Found {len(error_df)} potential errors")

    # Analyze pose-wise errors
    print("\nMost common incorrect labelings:")
    error_counts = error_df.groupby(['given_pose', 'predicted_pose']).size().sort_values(ascending=False)
    print(error_counts.head(10))

    # High confidence errors
    print("\nHighest confidence disagreements:")
    high_conf_errors = error_df.sort_values('confidence', ascending=False)
    print(high_conf_errors.head(10))

    return error_df

def validate_yoga_dataset(model, dataset, batch_size=32, threshold=0.9):
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False
    )

    # Find potential errors
    errors = find_potential_label_errors(model, dataset, dataloader, threshold)

    # Analyze and get DataFrame of errors
    error_df = analyze_yoga_errors(errors)

    return error_df
import torchvision.transforms as transforms
from PIL import Image
import torch

# Define de-normalization transform
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]
denormalize = transforms.Normalize(
    mean=[-m/s for m, s in zip(imagenet_mean, imagenet_std)],
    std=[1/s for s in imagenet_std]
)

def save_suspicious_sequences(error_df, dataset, save_dir, max_samples=5):
    """Save frames from suspicious sequences for manual review"""
    os.makedirs(save_dir, exist_ok=True)

    # Group by pose pairs to save examples of each type of error
    grouped_errors = error_df.groupby(['given_pose', 'predicted_pose'])

    for (given_pose, pred_pose), group in grouped_errors:
        # Take up to max_samples highest confidence examples for each error type
        samples = group.nlargest(max_samples, 'confidence')

        for idx, row in samples.iterrows():
            sequence_id = row['sequence_id']
            confidence = row['confidence']

            # Create directory for this error type
            error_dir = os.path.join(save_dir, f"{given_pose}_to_{pred_pose}")
            os.makedirs(error_dir, exist_ok=True)

            try:
                # Find the index in dataset.df that matches this sequence_id
                dataset_idx = dataset.df[dataset.df['sequence_id'] == sequence_id].index

                if len(dataset_idx) > 0:
                    # Get the sequence frames
                    frames, _ = dataset[dataset_idx[0]]

                    # Move frames to CPU if they're on GPU
                    if frames.is_cuda:
                        frames = frames.cpu()

                    # Save middle frame as representative image
                    middle_frame = frames[len(frames)//2]

                    # De-normalize the frame
                    middle_frame = denormalize(middle_frame)

                    # Clip the values to the valid range [0, 1] for image saving
                    middle_frame = torch.clamp(middle_frame, 0, 1)

                    save_path = os.path.join(error_dir, f'seq_{sequence_id}_conf_{confidence:.2f}.png')

                    # Convert tensor to PIL Image and save
                    middle_frame = transforms.ToPILImage()(middle_frame)
                    middle_frame.save(save_path)
                else:
                    print(f"Warning: Sequence ID {sequence_id} not found in dataset")

            except Exception as e:
                print(f"Error processing sequence {sequence_id}: {str(e)}")
                continue

# Usage example:

model = CNNLSTM(num_classes=NUM_CLASSES)
dataset = YogaVideoDataset(csv_path, sequence_path, pose_list, video_dir, preprocessed_dir)
checkpoint_path = os.path.join(save_path, 'best_model.pth')

if checkpoint_path and os.path.exists(checkpoint_path):
  model, optimizer, start_epoch, history = load_checkpoint(
            model, optimizer, checkpoint_path
  )
  print(f"Resuming training from epoch {start_epoch}")
    # Find and analyze errors
error_df = validate_yoga_dataset(model, dataset, batch_size=32, threshold=0.9)

    # Save suspicious sequences for manual review
save_dir=os.path.join(save_path,'suspicious_sequences')
save_suspicious_sequences(error_df, dataset, save_dir)

csv_path_file = os.path.join(save_path, 'potential_label_errors.csv')
    # Export results to CSV for further analysis
error_df.to_csv(csv_path_file, index=False)

    # Print some statistics
print("\nMost frequently mislabeled sequences:")
sequence_counts = error_df['sequence_id'].value_counts()
print(sequence_counts.head())

