In [None]:
%pip install torch torchvision matplotlib scikit-learn tqdm

In [14]:
import os
import time
from pathlib import Path
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models

from sklearn.metrics import confusion_matrix, classification_report
from PIL import Image

DATA_DIR = "Data"         # expects Data/<five different emotions folders>/
BATCH_SIZE = 32
NUM_EPOCHS = 12
LR = 1e-4
NUM_WORKERS = 4
MODEL_PATH = "best_emotion_resnet50.pth"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
VAL_SPLIT = 0.2

In [15]:
# Data transformations
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [16]:
# Dataset split
full_dataset = datasets.ImageFolder(DATA_DIR, transform=train_transforms)
num_val = int(len(full_dataset) * VAL_SPLIT)
num_train = len(full_dataset) - num_val
train_ds, val_ds = random_split(full_dataset, [num_train, num_val])

# Update validation dataset transforms
val_ds.dataset.transform = val_transforms
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

class_names = full_dataset.classes
num_classes = len(class_names)
print(f"Classes: {class_names}")
print(f"Number of training samples: {len(train_ds)}")
print(f"Number of validation samples: {len(val_ds)}")

Classes: ['Angry', 'Fear', 'Happy', 'Sad', 'Suprise']
Number of training samples: 47280
Number of validation samples: 11819


In [17]:
# models
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = True  # fine-tune all layers; set false to freeze backbone

in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)
model = model.to(DEVICE)



In [20]:
criterion = nn.CrossEntropyLoss() # Loss function
optimizer = torch.optim.Adam(model.parameters(), lr=LR) # Optimizer
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # Learning rate scheduler

In [19]:
# evaluation helper
def evaluate(model, loader):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data).item()
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_loss = running_loss / len(loader.dataset)
    acc = running_corrects / len(loader.dataset)
    return avg_loss, acc, all_preds, all_labels

In [None]:
# training loop
best_acc = 0.0
history = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

for epoch in range(NUM_EPOCHS):
    epoch_start = time.time()
    model.train()
    running_loss = 0.0
    running_corrects = 0

    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{NUM_EPOCHS}]", leave=False)
    for inputs, labels in loop:
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()
        loop.set_postfix(loss=loss.item())

    train_loss = running_loss / len(train_ds)
    train_acc = running_corrects / len(train_ds)
    val_loss, val_acc, _, _ = evaluate(model, val_loader)
    scheduler.step()

    history["train_loss"].append(train_loss)
    history["val_loss"].append(val_loss)
    history["train_acc"].append(train_acc)
    history["val_acc"].append(val_acc)

    epoch_time = time.time() - epoch_start
    print(f"Epoch {epoch+1}/{NUM_EPOCHS}  time: {epoch_time:.1f}s  train_loss: {train_loss:.4f}  train_acc: {train_acc:.4f}  val_loss: {val_loss:.4f}  val_acc: {val_acc:.4f}")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            "epoch": epoch+1,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "class_names": class_names,
            "val_acc": best_acc
        }, MODEL_PATH)
        print(f"Saved best model to {MODEL_PATH} (val_acc={best_acc:.4f})")
print("Training complete. Best val acc:", best_acc)

Epoch [1/12]:  24%|██▎       | 350/1478 [30:48<1:26:50,  4.62s/it, loss=0.76] 

In [None]:
# final evaluation
checkpoint = torch.load(MODEL_PATH, map_location=DEVICE)
model.load_state_dict(checkpoint["model_state"])
model.eval()

val_loss, val_acc, all_preds, all_labels = evaluate(model, val_loader)
print("Final val loss:", val_loss, "val acc:", val_acc)
print("\nClassification report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

cm = confusion_matrix(all_labels, all_preds)
print("Confusion matrix:\n", cm)

# 8. Plot training curves
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(history["train_loss"], label="train_loss")
plt.plot(history["val_loss"], label="val_loss")
plt.legend()
plt.title("Loss")

plt.subplot(1,2,2)
plt.plot(history["train_acc"], label="train_acc")
plt.plot(history["val_acc"], label="val_acc")
plt.legend()
plt.title("Accuracy")
plt.tight_layout()
plt.show()

In [None]:
def predict_image(img_path, model, topk=1):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    t = val_transforms(img).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        outputs = model(t)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        top_probs, top_idx = probs.topk(topk, dim=1)
    idx_to_class = {v:k for k,v in full_dataset.class_to_idx.items()}
    results = [(idx_to_class[int(i)], float(p)) for p, i in zip(top_probs[0], top_idx[0])]
    return results

print(predict_image("Data/Angry/77.png", model))

In [None]:
#Adding webcam functionality 
# Real time emotion detection

# Load model
model = models.resnet50()
model.fc = torch.nn.Linear(model.fc.in_features, 5)  # 5 emotions
model.load_state_dict(torch.load("best_emotion_resnet50.pth", map_location="cpu"))
model.eval()

In [None]:
import cv2

classes = ['Angry', 'Fear', 'Happy', 'Sad', 'Surprised']

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

cap = cv2.VideoCapture(0)  # 0 = default webcam

while True:
    ret, frame = cap.read()
    if not ret:
        continue

    # Convert frame to model input
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_pil = transform(img).unsqueeze(0)

    with torch.no_grad():
        output = model(img_pil)
        _, pred = torch.max(output, 1)
        emotion = classes[pred.item()]

    # Draw text on the video feed
    cv2.putText(frame, emotion, (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)

    cv2.imshow("Emotion Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()