In [1]:
#this is to be removed ... this is just a dummy model to test the pipeline
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Define the model
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(2048, 3)  # Assuming 3 classes: bored, attentive, confused

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

# Dummy training loop (Replace with real data)
for epoch in range(1):  # Run real training loop instead
    optimizer.zero_grad()
    dummy_input = torch.randn(1, 3, 224, 224)
    output = model(dummy_input)
    loss = loss_fn(output, torch.tensor([1]))  # Example target class
    loss.backward()
    optimizer.step()

# Save trained model
torch.save(model.state_dict(), "../models/model.pth")
print("✅ Model saved successfully at models/model.pth")




✅ Model saved successfully at models/model.pth


### **Import Required Libraries**


In [2]:
import os
import pandas as pd
from pathlib import Path
from torch.utils.data import Dataset
import logging
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import r2_score
from tqdm import tqdm
from torch import device
from torch.cuda.amp import GradScaler, autocast
import torch
import numpy as np
from pathlib import Path
from torch.utils.data import Dataset
import logging
import pandas as pd
import torch.nn.functional as F
import cv2


# Configure logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('model_training.log')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.addHandler(logging.StreamHandler())  # Add console logging

### Define the DAiSEEDataset Class

This class loads video sequences and pairs them with engagement metrics.

- **Features**: Uses precomputed features for faster training.
- **Error Handling**: Skips missing video directories and logs errors.


In [3]:
def get_csv_clip_id(video_stem: str) -> str:
    # Apply the same mapping rule as in frame extraction.
    if video_stem.startswith("110001"):
        return video_stem.replace("110001", "202614", 1)
    return video_stem

class DAiSEEDataset(Dataset):
    def __init__(self, root, csv_path, seq_length=15):
        self.root = Path(root)
        self.seq_length = seq_length
        self.video_paths = []
        self.labels = []
        
        df = pd.read_csv(csv_path)
        df.columns = df.columns.str.strip()

        for idx, row in df.iterrows():
            try:
                clip_id = str(row["ClipID"]).strip()
                # Remove the ".avi" extension:
                video_stem = clip_id.split('.')[0]
                # Use the mapping function:
                mapped_id = get_csv_clip_id(video_stem)
                video_dir = self.root / mapped_id

                if not video_dir.exists():
                    logger.debug(f"Video directory does not exist: {video_dir}")
                    continue

                frames = list(video_dir.glob("frame_*.jpg"))
                if len(frames) < self.seq_length:
                    logger.debug(f"Skipping {mapped_id}: insufficient frames ({len(frames)})")
                    continue

                self.video_paths.append(video_dir)
                self.labels.append(row["Engagement"])
            except Exception as e:
                logger.debug(f"Skipping row {idx} ({clip_id}): {str(e)}")
                continue

        if len(self.video_paths) == 0:
            raise ValueError(f"No valid video sequences found in {csv_path}")

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_dir = self.video_paths[idx]
        label = self.labels[idx]
        
        # Sort and select the first seq_length frames
        frame_paths = sorted(list(video_dir.glob("frame_*.jpg")))[:self.seq_length]
        frame_tensors = []
        from PIL import Image
        for path in frame_paths:
            img = Image.open(path).convert("RGB")
            if hasattr(self, 'transform') and self.transform:
                img = self.transform(img)
            frame_tensors.append(img)
        sequence = torch.stack(frame_tensors)
        return sequence, torch.tensor(label, dtype=torch.float32)

### **Define the CNN-LSTM Model**

**CNN-LSTM Model**:

1.  ResNet50 extracts features from each frame.
2.  LSTM processes temporal dependencies in sequences.
3.  Outputs four-dimensional regression values (engagement metrics).


In [4]:
class CNN_LSTM(nn.Module):
    def __init__(self, num_classes=4):
        super(CNN_LSTM, self).__init__()
        # Feature extractor using ResNet50
        resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)  # Updated line
        modules = list(resnet.children())[:-1]
        self.feature_extractor = nn.Sequential(*modules)
        
        # LSTM to capture temporal features
        self.lstm = nn.LSTM(2048, 512, batch_first=True)
        
        # Fully connected layer for regression
        self.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        batch_size, seq_len, c, h, w = x.size()
        x = x.view(-1, c, h, w)
        features = self.feature_extractor(x)
        features = features.view(batch_size, seq_len, -1)
        
        lstm_out, _ = self.lstm(features)
        lstm_out = lstm_out[:, -1, :]  # Take the last output
        out = self.fc(lstm_out)
        return out

### Precompute Features (Run Once)

This script extracts features from frames using ResNet50 and saves them to disk.


In [5]:
# Execute this celonce to generate precomputed features
from PIL import Image
import torch
from torchvision import transforms

def extract_features(frame_dir, output_dir):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1).cuda()
    resnet.eval()

    for vid_dir in Path(frame_dir).iterdir():
        output_subdir = Path(output_dir) / vid_dir.name
        output_subdir.mkdir(parents=True, exist_ok=True)
        frame_paths = sorted(vid_dir.glob("*.jpg"))
        with torch.no_grad():
            for i, path in enumerate(frame_paths):
                img = Image.open(path).convert("RGB")
                img_tensor = transform(img).unsqueeze(0).cuda()
                feature = resnet(img_tensor).squeeze().cpu().numpy()
                np.save(output_subdir / f"{i:04d}.npy", feature)
    logger.info("Feature extraction complete!")

### Define the CNN-LSTM Model

This model uses a pretrained ResNet50 (with frozen weights) and an LSTM.

- **Faster Training**: Freezes ResNet50 weights to reduce computational load.


In [6]:
class CNN_LSTM(nn.Module):
    def __init__(self, num_classes=4):
        super().__init__()
        resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        self.feature_extractor = nn.Sequential(*list(resnet.children())[:-1])
        
        # Freeze ResNet50 layers
        for param in self.feature_extractor.parameters():
            param.requires_grad = False
        
        self.feature_extractor.eval()  # Keep in eval mode
        
        self.lstm = nn.LSTM(2048, 512, batch_first=True)
        self.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        with torch.no_grad():
            batch_size, seq_len, c, h, w = x.size()
            x = x.view(-1, c, h, w)
            features = self.feature_extractor(x)
            features = features.view(batch_size, seq_len, -1)
        
        lstm_out, _ = self.lstm(features)
        lstm_out = lstm_out[:, -1]
        return self.fc(lstm_out)

### Data Loaders

Configures train, validation, and test loaders with optimal settings.

- **Multi-Workers**: Uses 8 workers (match CPU cores).
- **Pinned Memory**: Speeds up GPU transfers.


In [7]:
def get_dataloaders(batch_size=16):
    base_path = Path("C:/Users/abhis/Downloads/Documents/Learner Engagement Project/data/DAiSEE")
    train_frames_root = base_path / "ExtractedFrames" / "Train"
    val_frames_root = base_path / "ExtractedFrames" / "Validation"
    test_frames_root = base_path / "ExtractedFrames" / "Test"
    labels_path = base_path / "DataSet/Labels"
    
    train_dataset = DAiSEEDataset(train_frames_root, labels_path / "TrainLabels.csv", seq_length=15)
    val_dataset   = DAiSEEDataset(val_frames_root, labels_path / "ValidationLabels.csv", seq_length=15)
    test_dataset  = DAiSEEDataset(test_frames_root, labels_path / "TestLabels.csv", seq_length=15)
    
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True,
        num_workers=0, pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset, batch_size=batch_size, shuffle=False,
        num_workers=0, pin_memory=True
    )
    test_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False,
        num_workers=0, pin_memory=True
    )
    return train_loader, val_loader, test_loader

### Training Loop with Optimizations

- **Mixed Precision**: Uses FP16 for faster training.
- **Checkpointing**: Saves the best model based on validation loss


In [8]:
from torch.amp import GradScaler

def train_model(model, train_loader, val_loader, epochs=10):
    if len(train_loader) == 0:
        raise ValueError("Training dataset is empty. Check the data paths.")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-5)
    loss_fn = nn.MSELoss()
    scaler = GradScaler()  
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)
    
    best_val_loss = np.inf
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} Train"):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            # Use mixed precision training
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item() * inputs.size(0)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1} Val"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
        
        train_loss /= len(train_loader.dataset)
        val_loss /= len(val_loader.dataset)
        
        logger.info(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
        
        scheduler.step(val_loss)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
    
    return train_loss, val_loss

### Run the Training Process

Execute the training loop and evaluate the model.


In [9]:
def evaluate(model, data_loader):
    model.eval()
    all_preds, all_labels = [], []
    device = next(model.parameters()).device  # Get the device of the model
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            all_preds.append(outputs.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)
    mse = np.mean((all_preds - all_labels) ** 2, axis=0)
    r2 = r2_score(all_labels, all_preds, multioutput='raw_values')
    return mse, r2

if __name__ == "__main__":
    # Ensure features are precomputed before training
    train_loader, val_loader, test_loader = get_dataloaders(batch_size=16)
    
    model = CNN_LSTM()
    train_model(model, train_loader, val_loader, epochs=20)
    
    # Load and evaluate the best model
    model.load_state_dict(torch.load('best_model.pth'))
    model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    model.eval()
    
    test_mse, test_r2 = evaluate(model, test_loader)
    logger.info(f"Test MSE: {test_mse}\nTest R²: {test_r2}")

Epoch 1 Train:   0%|          | 0/304 [00:00<?, ?it/s]


TypeError: expected Tensor as element 0 in argument 0, but got Image