In [1]:
import pandas as pd
import torch
import torch.optim as optim
from torchvision import transforms
import torch.nn as nn
import copy
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os

In [2]:
df = pd.read_pickle('df_balanced_new.pkl')

In [3]:
df.columns

Index(['name', 'description', 'label', 'base_name', 'emotion_category',
       'image', 'engagement_type', 'label_encoded', 'image_id'],
      dtype='object')

In [4]:
df = df[['image', 'engagement_type']]

In [6]:
model = torch.load("alexnet_best_final_corrected.pth", map_location=device)
model.to(device)
model.eval()

  model = torch.load("alexnet_best_final_corrected.pth", map_location=device)


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [None]:
model.train()

# FINETUNING
Since we're working with the same dataset and want to improve accuracy, it's usually best to fine-tune all layers. That means ensuring all parameters require gradients:

In [None]:
for param in model.parameters():
    param.requires_grad = True

In [None]:
#Using SGD optimizer this time
optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)

In [None]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['engagement_type'])

In [None]:
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

In [None]:
class EngagementDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = self.df.loc[idx, 'image']  # Tensor
        label = self.df.loc[idx, 'label']  # Integer

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# AlexNet was trained on ImageNet — we mimic that normalization
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])



In [None]:
train_dataset = EngagementDataset(train_df, transform=normalize)
val_dataset = EngagementDataset(val_df, transform=normalize)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
import torch.nn as nn
import copy

criterion = nn.CrossEntropyLoss()
num_epochs = 20  # adjust as needed

best_val_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    # ===== Train =====
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.long().to(device)  # Ensure correct dtype

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total

    # ===== Validation =====
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.long().to(device)  # Ensure correct dtype

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss = val_loss / len(val_loader)
    val_acc = correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}] | "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    # ===== Save Best Model =====
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(model.state_dict(), "alexnet_best_final_corrected.pth")
        print("✔️ New best model saved.")

# ===== Load Best Model After Training =====
model.load_state_dict(best_model_wts)


In [20]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.long().to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.long().to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss = val_loss / len(val_loader)
    val_acc = correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}] | "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(model.state_dict(), "alexnet_best_finetuned.pth")
        print("✔️ New best model saved.")


KeyboardInterrupt: 

In [40]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion = nn.CrossEntropyLoss()
num_epochs = 30
patience = 5  # Early stopping patience

best_val_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())
early_stop_counter = 0

# Scheduler to reduce LR when validation accuracy plateaus
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.long().to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.long().to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss = val_loss / len(val_loader)
    val_acc = correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}] | "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    # Step the scheduler
    scheduler.step(val_acc)

    # Early stopping logic
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(model.state_dict(), "alexnet_best_finetuned.pth")
        print("✔️ New best model saved.")
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        print(f"⚠️ No improvement. Early stop counter: {early_stop_counter}/{patience}")
        if early_stop_counter >= patience:
            print("⏹️ Early stopping triggered.")
            break

# Load best weights after training
model.load_state_dict(best_model_wts)




Epoch [1/30] | Train Loss: 0.1280 Acc: 0.9508 | Val Loss: 0.9832 Acc: 0.7933
✔️ New best model saved.
Epoch [2/30] | Train Loss: 0.1250 Acc: 0.9513 | Val Loss: 1.0179 Acc: 0.7943
✔️ New best model saved.
Epoch [3/30] | Train Loss: 0.1256 Acc: 0.9537 | Val Loss: 1.0191 Acc: 0.7947
✔️ New best model saved.
Epoch [4/30] | Train Loss: 0.1212 Acc: 0.9528 | Val Loss: 1.0377 Acc: 0.7947
⚠️ No improvement. Early stop counter: 1/5
Epoch [5/30] | Train Loss: 0.1186 Acc: 0.9531 | Val Loss: 1.0728 Acc: 0.7984
✔️ New best model saved.
Epoch [6/30] | Train Loss: 0.1108 Acc: 0.9566 | Val Loss: 1.0782 Acc: 0.7970
⚠️ No improvement. Early stop counter: 1/5
Epoch [7/30] | Train Loss: 0.1130 Acc: 0.9546 | Val Loss: 1.0074 Acc: 0.7970
⚠️ No improvement. Early stop counter: 2/5
Epoch [8/30] | Train Loss: 0.1117 Acc: 0.9580 | Val Loss: 0.9656 Acc: 0.7860
⚠️ No improvement. Early stop counter: 3/5
Epoch [9/30] | Train Loss: 0.1043 Acc: 0.9607 | Val Loss: 1.0369 Acc: 0.8002
✔️ New best model saved.
Epoch [10/

<All keys matched successfully>

In [41]:
print(list(label_encoder.classes_))

['engaged-negative', 'engaged-positive', 'not engaged']


## testing the model

In [8]:
import cv2
import time
import torch
import numpy as np
from torchvision import transforms
from sklearn.preprocessing import LabelEncoder

# ========== DEVICE SETUP ==========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ========== LOAD MODEL ==========
# Since you saved the entire model with torch.save(model, ...), load it directly
model = torch.load("alexnet_best_final_corrected.pth", map_location=device)
model.to(device)
model.eval()

# ========== LABEL DECODING ==========
# If you saved the label encoder with joblib, you can use:
# import joblib
# label_encoder = joblib.load("label_encoder.joblib")
# label_encoder.inverse_transform([pred])[0]

# Otherwise, define manually (must match training order)
label_encoder_classes = ['bored', 'confused', 'focused']
def decode_label(index):
    return label_encoder_classes[index]

# ========== IMAGE TRANSFORM ==========
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ========== CAMERA LOOP ==========
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("❌ Could not open webcam.")
    exit()

print("📷 Webcam activated. Press Ctrl+C to stop.")

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("❌ Failed to capture frame.")
            break

        # Convert BGR (OpenCV) to RGB (PyTorch expects RGB)
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Transform image
        input_tensor = transform(rgb).unsqueeze(0).to(device)

        # Predict
        with torch.no_grad():
            output = model(input_tensor)
            pred = torch.argmax(output, dim=1).item()
            label = decode_label(pred)

        # Display result on frame
        cv2.putText(frame, f"Prediction: {label}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Show the frame
        cv2.imshow("Engagement Detection", frame)

        # Wait for 1ms and break loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        time.sleep(2)  # Optional: slow down for better viewing

except KeyboardInterrupt:
    print("\n🛑 Stopped by user.")

finally:
    cap.release()
    cv2.destroyAllWindows()


  model = torch.load("alexnet_best_final_corrected.pth", map_location=device)


📷 Webcam activated. Press Ctrl+C to stop.

🛑 Stopped by user.


In [44]:
print(output)
print(f"🧠 Prediction: {label}")

tensor([[-4.8052, -3.4523,  7.7021]], device='cuda:0')
🧠 Prediction: focused
