In [1]:
#this is to be removed ... this is just a dummy model to test the pipeline
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Define the model
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(2048, 3)  # Assuming 3 classes: bored, attentive, confused

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

# Dummy training loop (Replace with real data)
for epoch in range(1):  # Run real training loop instead
    optimizer.zero_grad()
    dummy_input = torch.randn(1, 3, 224, 224)
    output = model(dummy_input)
    loss = loss_fn(output, torch.tensor([1]))  # Example target class
    loss.backward()
    optimizer.step()

# Save trained model
torch.save(model.state_dict(), "../models/model.pth")
print("✅ Model saved successfully at models/model.pth")




✅ Model saved successfully at models/model.pth


In [2]:
print(torch.__version__)

2.6.0+cu126


### **Import Required Libraries**


In [3]:
import os
import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
import logging
import pandas as pd
from pathlib import Path
from torch.utils.data import Dataset
import torch
import torchvision.transforms as transforms
from torchvision import models
import torch.optim as optim
import numpy as np
from sklearn.metrics import r2_score
from torch import device
from torch.cuda.amp import GradScaler, autocast
from PIL import Image

# Configure logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('training.log')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.addHandler(logging.StreamHandler())

In [4]:
# Define paths
DATASET_ROOT = Path("C:/Users/abhis/Downloads/Documents/Learner Engagement Project/data/DAiSEE/DataSet").resolve()
FRAMES_ROOT = Path("C:/Users/abhis/Downloads/Documents/Learner Engagement Project/data/DAiSEE/ExtractedFrames").resolve()

# Mapping function
def get_csv_clip_id(video_stem: str) -> str:
    base = video_stem.strip()
    if base.startswith("110001"):
        base = base.replace("110001", "202614", 1)
    return base

### Define the DAiSEEDataset Class

This class loads video sequences and pairs them with engagement metrics.

- **Features**: Uses precomputed features for faster training.
- **Error Handling**: Skips missing video directories and logs errors.


In [5]:
# Custom Dataset Class
class DAiSEEDataset(Dataset):
    def __init__(self, root, csv_path, transform=None):
        self.root = Path(root)
        self.transform = transform
        self.video_paths = []
        self.labels = []
        self.missing_videos = 0
        self.total_videos = 0

        df = pd.read_csv(csv_path, dtype=str)  # Read all columns as strings
        df.columns = df.columns.str.strip()  # Remove whitespace from column names
        split = Path(csv_path).stem.replace("Labels", "").strip()

        for idx, row in df.iterrows():
            self.total_videos += 1
            try:
                clip_id = row['ClipID'].strip()
                filename = clip_id.split('/')[-1] if '/' in clip_id else clip_id
                video_stem = filename.rsplit('.', 1)[0]
                mapped_id = get_csv_clip_id(video_stem)
                
                print(f"Processing video {video_stem} -> mapped to {mapped_id}")  # Debug print
                
                video_dir = self.root / split / mapped_id
                if not video_dir.exists():
                    print(f"Video directory does not exist: {video_dir}")  # Debug print
                    self.missing_videos += 1
                    continue
                
                frames = list(video_dir.glob('frame_*.jpg'))
                if len(frames) < 15:
                    print(f"Insufficient frames ({len(frames)}) in {video_dir}")  # Debug print
                    self.missing_videos += 1
                    continue
                
                self.video_paths.append(video_dir)
                self.labels.append([
                    row['Boredom'].strip(),
                    row['Engagement'].strip(),
                    row['Confusion'].strip(),
                    row['Frustration'].strip()
                ])
            except Exception as e:
                print(f"Error processing row {idx}: {e}")
        
        if not self.video_paths:
            raise ValueError("No valid videos found")

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_dir = self.video_paths[idx]
        label = [float(val) for val in self.labels[idx]]  # Convert labels to floats
        
        frames = sorted(video_dir.glob('frame_*.jpg'))[:15]
        frame_tensors = []
        for path in frames:
            img = Image.open(path).convert("RGB")
            if self.transform:
                img = self.transform(img)
            else:
                img = transforms.ToTensor()(img)
            frame_tensors.append(img)
        
        sequence = torch.stack(frame_tensors)
        label_tensor = torch.tensor(label, dtype=torch.float32)
        return sequence, label_tensor

### **Define the CNN-LSTM Model**

**CNN-LSTM Model**:

1.  ResNet50 extracts features from each frame.
2.  LSTM processes temporal dependencies in sequences.
3.  Outputs four-dimensional regression values (engagement metrics).


In [6]:
class CNN_LSTM(nn.Module):
    def __init__(self, num_classes=4):
        super(CNN_LSTM, self).__init__()
        resnet = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
        modules = list(resnet.children())[:-1]
        self.feature_extractor = nn.Sequential(*modules)
        
        self.lstm = nn.LSTM(2048, 512, batch_first=True)
        self.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        batch_size, seq_len, c, h, w = x.size()
        x = x.view(-1, c, h, w)
        features = self.feature_extractor(x)
        features = features.view(batch_size, seq_len, -1)
        
        lstm_out, _ = self.lstm(features)
        lstm_out = lstm_out[:, -1]
        return self.fc(lstm_out)

### **Data Loaders**

**Configures train, validation, and test loaders with optimal settings.**


In [7]:
train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])
valid_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])


def get_dataloaders(batch_size=16):
    labels_path = DATASET_ROOT / "Labels"
    
    datasets = {
        "Train": DAiSEEDataset(FRAMES_ROOT, labels_path / "TrainLabels.csv", transform=train_transform),
        "Validation": DAiSEEDataset(FRAMES_ROOT, labels_path / "ValidationLabels.csv", transform=valid_transform),
        "Test": DAiSEEDataset(FRAMES_ROOT, labels_path / "TestLabels.csv", transform=valid_transform)
    }

    dataloaders = {}
    num_workers = 0  # Use 0 for Windows

    for split, dataset in datasets.items():
        dataloaders[split] = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=True if split == "Train" else False,
            num_workers=num_workers,
            pin_memory=True
        )
    
    return dataloaders


# Get data loaders
dataloaders = get_dataloaders(batch_size=16)
train_loader, val_loader, test_loader = dataloaders['Train'], dataloaders['Validation'], dataloaders['Test']

Processing video 1100011002 -> mapped to 2026141002
Processing video 1100011003 -> mapped to 2026141003
Processing video 1100011004 -> mapped to 2026141004
Processing video 1100011005 -> mapped to 2026141005
Processing video 1100011006 -> mapped to 2026141006
Processing video 1100011007 -> mapped to 2026141007
Processing video 1100011008 -> mapped to 2026141008
Processing video 1100011009 -> mapped to 2026141009
Processing video 1100011010 -> mapped to 2026141010
Processing video 1100011011 -> mapped to 2026141011
Processing video 1100011012 -> mapped to 2026141012
Processing video 1100011013 -> mapped to 2026141013
Processing video 1100011014 -> mapped to 2026141014
Processing video 1100011015 -> mapped to 2026141015
Processing video 1100011016 -> mapped to 2026141016
Processing video 1100011017 -> mapped to 2026141017
Processing video 1100011018 -> mapped to 2026141018
Processing video 1100011019 -> mapped to 2026141019
Processing video 1100011020 -> mapped to 2026141020
Processing v

### Training Loop with Optimizations

- **Mixed Precision**: Uses FP16 for faster training.
- **Checkpointing**: Saves the best model based on validation loss


In [None]:
def train_model(model, train_loader, val_loader, epochs=20, learning_rate=1e-5):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.MSELoss()
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)
    
    best_val_loss = np.inf
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    model_save_dir = Path(f"C:/Users/abhis/Downloads/Documents/Learner Engagement Project/models/{timestamp}")
    model_save_dir.mkdir(parents=True, exist_ok=True)
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} Train"):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            with autocast(dtype=torch.float16):  # Use correct parameters
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item() * inputs.size(0)
        
        model.eval()
        val_loss = 0.0
        val_preds = []
        val_labels = []
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1} Val"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                val_preds.append(outputs.cpu().numpy())
                val_labels.append(labels.cpu().numpy())
        val_preds = np.concatenate(val_preds)
        val_labels = np.concatenate(val_labels)
        r2 = r2_score(val_labels, val_preds, multioutput='raw_values')
        
        train_loss /= len(train_loader.dataset)
        val_loss /= len(val_loader.dataset)
        
        logger.info(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val R²: {r2}")
        
        scheduler.step(val_loss)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            model_save_path = model_save_dir / "ResNet50_CNNLSTM_baseline.pth"
            torch.save(model.state_dict(), model_save_path)
            logger.info(f"Model saved to {model_save_path}")

### Run the Training Process

Execute the training loop and evaluate the model.


In [None]:
model = CNN_LSTM(num_classes=4)
train_model(model, train_loader, val_loader, epochs=20)

Using cache found in C:\Users\abhis/.cache\torch\hub\pytorch_vision_v0.10.0
Epoch 1 Train:   0%|          | 0/304 [00:01<?, ?it/s]


TypeError: autocast.__init__() missing 1 required positional argument: 'device_type'

**Model Evaluation:** After training, use the evaluation function to compute additional metrics:


In [None]:
def evaluate(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            all_preds.append(outputs.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)
    r2 = r2_score(all_labels, all_preds, multioutput='raw_values')
    return r2

In [None]:
# After training
model = CNN_LSTM(num_classes=4)
model.load_state_dict(torch.load('path/to/your/model.pth'))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
r2 = evaluate(model, test_loader, device)
print(f"Test R²: {r2}")