In [None]:
!pip install torchvision
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import pandas as pd
from torch.cuda.amp import autocast, GradScaler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("📍 Device:", device)


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.6.0->torchvision)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch==2.6.0->torchvision)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86

In [None]:

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load the full label file and create a subset CSV
csv_path_full = "/content/drive/MyDrive/Data Mining Project/AllLabels.csv"
df = pd.read_csv(csv_path_full)
df.columns = df.columns.str.strip()

# Sample 200 clips for faster training
sampled_df = df.sample(n=200, random_state=42)
small_csv_path = "/content/drive/MyDrive/Data Mining Project/AllLabels.csv_200.csv"
sampled_df.to_csv(small_csv_path, index=False)


In [None]:
class DAiSEESequenceDataset(Dataset):
    def __init__(self, csv_path, image_root, transform, max_frames=5):
        self.data = pd.read_csv(csv_path)
        self.data.columns = self.data.columns.str.strip()
        self.image_root = image_root
        self.transform = transform
        self.max_frames = max_frames

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        clip_id = row['ClipID'].replace(".avi", "")
        group_id = clip_id[:6]
        folder_path = os.path.join(self.image_root, group_id, clip_id)

        images = []
        for i in range(self.max_frames):
            img_path = os.path.join(folder_path, f"image_{i:03d}.jpg")
            if os.path.exists(img_path):
                img = Image.open(img_path).convert("RGB")
                if self.transform:
                    img = self.transform(img)
                images.append(img)

        if len(images) == 0:
            dummy = torch.zeros((self.max_frames, 3, 112, 112))
            labels = torch.tensor([
                row['Boredom'],
                row['Engagement'],
                row['Confusion'],
                row['Frustration']
            ], dtype=torch.long)
            return dummy, labels

        while len(images) < self.max_frames:
            images.append(torch.zeros_like(images[0]))

        images = torch.stack(images)
        labels = torch.tensor([
            row['Boredom'],
            row['Engagement'],
            row['Confusion'],
            row['Frustration']
        ], dtype=torch.long)

        return images, labels


In [None]:
class CNN_GRU_MultiHead(nn.Module):
    def __init__(self, hidden_dim=128, num_layers=1):
        super().__init__()
        resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        modules = list(resnet.children())[:-1]
        self.cnn = nn.Sequential(*modules)
        self.gru = nn.GRU(512, hidden_dim, num_layers, batch_first=True)
        self.heads = nn.ModuleList([nn.Linear(hidden_dim, 4) for _ in range(4)])

    def forward(self, x_seq):  # [B, T, C, H, W]
        B, T, C, H, W = x_seq.shape
        x_seq = x_seq.view(B * T, C, H, W)
        feats = self.cnn(x_seq).view(B, T, -1)  # [B, T, 512]
        gru_out, _ = self.gru(feats)
        last_hidden = gru_out[:, -1, :]
        return [head(last_hidden) for head in self.heads]


In [None]:
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

csv_path = "/content/drive/MyDrive/Data Mining Project/AllLabels.csv_200.csv"
image_root = "/content/drive/MyDrive/Data Mining Project/DAiSEE_Frames_Every3s/Train"

dataset = DAiSEESequenceDataset(csv_path, image_root, transform, max_frames=5)
loader = DataLoader(dataset, batch_size=32, shuffle=True)


In [None]:
model = CNN_GRU_MultiHead().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()
scaler = GradScaler()

epochs = 5

for epoch in range(1, epochs + 1):
    model.train()
    total_loss = 0
    correct = [0, 0, 0, 0]
    total = [0, 0, 0, 0]

    print(f"\n📘 Epoch {epoch}/{epochs} ------------------------")

    for batch_idx, (x_seq, labels) in enumerate(loader):
        x_seq, labels = x_seq.to(device), labels.to(device)

        optimizer.zero_grad()

        with autocast():
            outputs = model(x_seq)
            loss = sum([criterion(out, labels[:, i]) for i, out in enumerate(outputs)])

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

        for i in range(4):
            preds = outputs[i].argmax(dim=1)
            correct[i] += (preds == labels[:, i]).sum().item()
            total[i] += labels.size(0)

        if (batch_idx + 1) % 2 == 0 or (batch_idx + 1) == len(loader):
            print(f"Batch {batch_idx + 1}/{len(loader)} | Batch Loss: {loss.item():.4f}")

    print(f"\n✅ Epoch {epoch} Summary:")
    print(f"Avg Loss: {total_loss / len(loader):.4f}")
    for i, name in enumerate(['Boredom', 'Engagement', 'Confusion', 'Frustration']):
        acc = 100 * correct[i] / total[i]
        print(f"{name} Accuracy: {acc:.2f}%")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 56.5MB/s]
  scaler = GradScaler()



📘 Epoch 1/5 ------------------------


  with autocast():


Batch 2/7 | Batch Loss: 5.4002
Batch 4/7 | Batch Loss: 4.4218
Batch 6/7 | Batch Loss: 4.3293
Batch 7/7 | Batch Loss: 4.0913

✅ Epoch 1 Summary:
Avg Loss: 4.7635
Boredom Accuracy: 44.50%
Engagement Accuracy: 38.50%
Confusion Accuracy: 48.00%
Frustration Accuracy: 58.50%

📘 Epoch 2/5 ------------------------
Batch 2/7 | Batch Loss: 3.7163
Batch 4/7 | Batch Loss: 3.8250
Batch 6/7 | Batch Loss: 3.4883
Batch 7/7 | Batch Loss: 3.0959

✅ Epoch 2 Summary:
Avg Loss: 3.7367
Boredom Accuracy: 46.00%
Engagement Accuracy: 55.00%
Confusion Accuracy: 67.50%
Frustration Accuracy: 79.00%

📘 Epoch 3/5 ------------------------
Batch 2/7 | Batch Loss: 3.5377
Batch 4/7 | Batch Loss: 4.0177
Batch 6/7 | Batch Loss: 3.3948
Batch 7/7 | Batch Loss: 3.0461

✅ Epoch 3 Summary:
Avg Loss: 3.5051
Boredom Accuracy: 47.50%
Engagement Accuracy: 62.00%
Confusion Accuracy: 67.50%
Frustration Accuracy: 79.00%

📘 Epoch 4/5 ------------------------
Batch 2/7 | Batch Loss: 3.3410
Batch 4/7 | Batch Loss: 3.3565
Batch 6/7 | Ba

In [None]:
class DAiSEEValidationDataset(Dataset):
    def __init__(self, label_csv, image_root, transform=None, max_frames=5):
        self.df = pd.read_csv(label_csv)
        self.df.columns = self.df.columns.str.strip()
        self.image_root = image_root
        self.transform = transform
        self.max_frames = max_frames
        self.valid_clips = []

        for _, row in self.df.iterrows():
            clip_id = row["ClipID"].replace(".avi", "")
            group_id = clip_id[:6]
            folder_path = os.path.join(image_root, group_id, clip_id)
            if os.path.exists(os.path.join(folder_path, "image_000.jpg")):
                self.valid_clips.append((folder_path, row))

    def __len__(self):
        return len(self.valid_clips)

    def __getitem__(self, idx):
        folder_path, row = self.valid_clips[idx]
        images = []

        for i in range(self.max_frames):
            img_path = os.path.join(folder_path, f"image_{i:03d}.jpg")
            if os.path.exists(img_path):
                img = Image.open(img_path).convert("RGB")
                if self.transform:
                    img = self.transform(img)
                images.append(img)

        if len(images) == 0:
            images = [torch.zeros((3, 112, 112)) for _ in range(self.max_frames)]

        while len(images) < self.max_frames:
            images.append(torch.zeros_like(images[0]))

        images = torch.stack(images)

        labels = torch.tensor([
            row['Boredom'],
            row['Engagement'],
            row['Confusion'],
            row['Frustration']
        ], dtype=torch.long)

        return images, labels


In [None]:
val_csv = "/content/drive/MyDrive/Data Mining Project/AllLabels.csv_200.csv"
val_root = "/content/drive/MyDrive/Data Mining Project/DAiSEE_Frames_Every3s/Validation"

val_dataset = DAiSEEValidationDataset(val_csv, val_root, transform, max_frames=5)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"✅ Loaded {len(val_dataset)} validation samples.")


✅ Loaded 29 validation samples.


In [None]:
model.eval()
correct = [0, 0, 0, 0]
total = [0, 0, 0, 0]

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)

        for i in range(4):
            preds = outputs[i].argmax(dim=1)
            correct[i] += (preds == labels[:, i]).sum().item()
            total[i] += labels.size(0)

print("\n🎯 Final Validation Accuracy:")
for i, name in enumerate(['Boredom', 'Engagement', 'Confusion', 'Frustration']):
    acc = 100 * correct[i] / total[i]
    print(f"{name}: {acc:.2f}%")



🎯 Final Validation Accuracy:
Boredom: 24.14%
Engagement: 34.48%
Confusion: 72.41%
Frustration: 72.41%


In [None]:
class DAiSEEValidationDataset(Dataset):  # Works for both val and test
    def __init__(self, label_csv, image_root, transform=None, max_frames=5):
        self.df = pd.read_csv(label_csv)
        self.df.columns = self.df.columns.str.strip()
        self.image_root = image_root
        self.transform = transform
        self.max_frames = max_frames
        self.valid_clips = []

        for _, row in self.df.iterrows():
            clip_id = row["ClipID"].replace(".avi", "")
            group_id = clip_id[:6]
            folder_path = os.path.join(image_root, group_id, clip_id)
            if os.path.exists(os.path.join(folder_path, "image_000.jpg")):
                self.valid_clips.append((folder_path, row))

    def __len__(self):
        return len(self.valid_clips)

    def __getitem__(self, idx):
        folder_path, row = self.valid_clips[idx]
        images = []

        for i in range(self.max_frames):
            img_path = os.path.join(folder_path, f"image_{i:03d}.jpg")
            if os.path.exists(img_path):
                img = Image.open(img_path).convert("RGB")
                if self.transform:
                    img = self.transform(img)
                images.append(img)

        if len(images) == 0:
            images = [torch.zeros((3, 112, 112)) for _ in range(self.max_frames)]

        while len(images) < self.max_frames:
            images.append(torch.zeros_like(images[0]))

        images = torch.stack(images)

        labels = torch.tensor([
            row['Boredom'],
            row['Engagement'],
            row['Confusion'],
            row['Frustration']
        ], dtype=torch.long)

        return images, labels


In [None]:
test_csv = "/content/drive/MyDrive/Data Mining Project/AllLabels.csv_200.csv"
test_root = "/content/drive/MyDrive/Data Mining Project/DAiSEE_Frames_Every3s/Test"

test_dataset = DAiSEEValidationDataset(test_csv, test_root, transform, max_frames=5)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"✅ Loaded {len(test_dataset)} test samples.")


✅ Loaded 49 test samples.


In [None]:
model.eval()
correct = [0, 0, 0, 0]
total = [0, 0, 0, 0]

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)

        for i in range(4):
            preds = outputs[i].argmax(dim=1)
            correct[i] += (preds == labels[:, i]).sum().item()
            total[i] += labels.size(0)

print("\n🧪 Final Test Accuracy:")
for i, name in enumerate(['Boredom', 'Engagement', 'Confusion', 'Frustration']):
    acc = 100 * correct[i] / total[i]
    print(f"{name}: {acc:.2f}%")



🧪 Final Test Accuracy:
Boredom: 38.78%
Engagement: 51.02%
Confusion: 73.47%
Frustration: 83.67%
