## ConvLSTM Encoder and Decoder Architecture

In [1]:
import torch 
from training import ConvLSTM_GestureRecognitionModel
from colorVideoDataset import ColorVideoDataset
from datasetModule import GestureDataModule

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn.metrics import confusion_matrix

In [2]:
MODEL_PATH = 'ConvLstm_final.pth'      
DATA_ROOT_DIR = './colors'            
BATCH_SIZE = 16
NUM_CLASSES = 8 
MODEL_CHECKPOINT = "checkpoints\convlstm-epoch=99-val_loss=0.87.ckpt"

  MODEL_CHECKPOINT = "checkpoints\convlstm-epoch=99-val_loss=0.87.ckpt"


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvLSTM_GestureRecognitionModel(num_classes=NUM_CLASSES).to(device)

model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model.eval() # Set the model to evaluation mode
print("Model loaded successfully and set to evaluation mode.")

Model loaded successfully and set to evaluation mode.


In [4]:
dataset = ColorVideoDataset(root_dir=DATA_ROOT_DIR, transform=None)
class_names = dataset.get_class_names()
print(f"Dataset loaded with {len(dataset)} samples and {NUM_CLASSES} classes: {class_names}")

Dataset loaded with 320 samples and 8 classes: ['black', 'blue', 'brown', 'green', 'orange', 'red', 'white', 'yellow']


In [5]:
def infer(checkpoint_path, data_dir, batch_size=16):
    data_module = GestureDataModule(data_dir=data_dir, batch_size=batch_size)
    data_module.setup() 

    test_loader = data_module.test_dataloader()
    
    # 1. Load the model from checkpoint
    try:
        model = ConvLSTM_GestureRecognitionModel.load_from_checkpoint(
            checkpoint_path=checkpoint_path
        )
        print(f"Model loaded from {checkpoint_path}.")
    except Exception as e:
        print(f"Error loading model from checkpoint: {e}")

    # 2. Setup model for inference
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    print(f"Starting inference on {len(data_module.test_dataset)} samples...")
    
    all_preds = []
    all_labels = []
    
    # 3. Iterate through the test data and predict
    with torch.no_grad():
        for x, y, _ in test_loader:
            print(x.shape)
            # --- Input Permutation (Must match model's forward logic) ---
            if x.shape[-1] == 3: 
                x = x.permute(0, 1, 4, 2, 3) # (B, T, H, W, C) -> (B, T, C, H, W)
            elif x.shape[2] != 3: 
                x = x.permute(0, 2, 1, 3, 4) # (B, C, T, H, W) -> (B, T, C, H, W)
                
            x = x.to(device)

            # Forward pass
            outputs = model(x)
            
            # Get the predicted class index
            predictions = torch.argmax(outputs, dim=1)
            
            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(y.cpu().numpy()) # Collect true labels for confusion matrix/metrics
            
    print("Inference complete.")
    return np.array(all_preds), np.array(all_labels)

In [6]:
predictions = infer(MODEL_CHECKPOINT, data_dir="./colors")

if predictions is not None:
    print("\nSample Predictions:")
    print(predictions[:5])

ValueError: Sum of input lengths does not equal the length of the input dataset!

### Confusion Matrix

In [None]:
cm = confusion_matrix(all_labels, all_preds)
print("\n--- Confusion Matrix (Raw) ---")
print(cm)

In [None]:
def plot_confusion_matrix(cm, class_names, normalize=False, title='Confusion Matrix'):
    if normalize:
        cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        data = cm_normalized
        title = title + ' (Normalized)'
        fmt = '.2f' 
    else:
        data = cm
        title = title + ' (Counts)'
        fmt = 'd'

    plt.figure(figsize=(10, 8))
    
    sns.heatmap(data, 
                annot=True,              
                fmt=fmt,                 
                cmap='Blues',            
                cbar=True,               
                xticklabels=class_names, # X-axis labels (Predicted)
                yticklabels=class_names) # Y-axis labels (True)
    
    plt.title(title)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
plot_confusion_matrix(cm, class_names, normalize=True)

### False Positives and False Negative's Matrix

In [None]:
class_names = dataset.get_class_names()

In [None]:
def calculate_ova_metrics(cm, class_names):
    metrics = {}
    # num_classes = len(class_names)
    
    for i, class_name in enumerate(class_names):
        TP = cm[i, i]
        
        FN = np.sum(cm[i, :]) - TP
        
        FP = np.sum(cm[:, i]) - TP
        
        TN = np.sum(cm) - (TP + FN + FP)
        
        metrics[class_name] = {'TP': TP, 'TN': TN, 'FP': FP, 'FN': FN}
        
    df_metrics = pd.DataFrame.from_dict(metrics, orient='index')
    return df_metrics

In [None]:
df_ova_metrics = calculate_ova_metrics(cm, class_names)

In [None]:
def plot_ova_metrics(df_metrics, title='One-vs-All Performance Matrix'):
    plt.figure(figsize=(12, df_metrics.shape[0] * 1.5))
    
    sns.heatmap(df_metrics, 
                annot=True, 
                fmt='d',         
                cmap='YlGnBu',    
                linewidths=.5,    
                linecolor='black',
                cbar_kws={'label': 'Count'})
    
    plt.title(title, fontsize=16)
    plt.ylabel('Class (Treated as Positive)', fontsize=14)
    plt.xlabel('Metric', fontsize=14)
    plt.yticks(rotation=0)
    plt.show()

plot_ova_metrics(df_ova_metrics, title='OvA Classification Metrics (TP, TN, FP, FN)')

### F1, Recall, Precision Per Class