In [None]:
import os, json
import numpy as np
from tqdm import tqdm
from collections import Counter
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from facenet_pytorch import MTCNN
import mediapipe as mp

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.backends.cudnn.benchmark = True
print(f"Device: {device}")

# ---------------------------
# Paths & Dataset
# ---------------------------
train_path = r"C:\Ashvin\AI ML\Project\Emotion Detection\Data\train"
test_path  = r"C:\Ashvin\AI ML\Project\Emotion Detection\Data\test"

train_raw = datasets.ImageFolder(train_path)
test_raw  = datasets.ImageFolder(test_path)

with open("classes.json", "w") as f:
    json.dump(train_raw.classes, f)

# ---------------------------
# Face detection & landmarks
# ---------------------------
mtcnn = MTCNN(image_size=128, margin=10, keep_all=False, device=device)
mp_face = mp.solutions.face_mesh

def preprocess_faces(dataset, split):
    print(f" Preprocessing {split} set...")
    faces, labels, landmarks_all = [], [], []
    with mp_face.FaceMesh(static_image_mode=True, max_num_faces=1) as fm:
        for img, label in tqdm(dataset):
            img_rgb = img.convert('RGB')
            face_rgb = mtcnn(img_rgb)
            if isinstance(face_rgb, list):
                face_rgb = face_rgb[0] if len(face_rgb) > 0 else None
            if face_rgb is None:
                continue
            np_face = (face_rgb.permute(1,2,0).cpu().numpy() * 255).astype(np.uint8)
            results = fm.process(np_face)
            if results.multi_face_landmarks:
                landmarks = [(lm.x, lm.y, lm.z) for lm in results.multi_face_landmarks[0].landmark]
                if len(landmarks) == 468:
                    faces.append(face_rgb)
                    labels.append(label)
                    landmarks_all.append(landmarks)
    print(f" Collected {len(faces)} faces for {split}")
    return torch.stack(faces), torch.tensor(labels), landmarks_all

if os.path.exists('ed_train_data.pt') and os.path.exists('ed_test_data.pt'):
    print(" Loading preprocessed data...")
    train_data = torch.load('ed_train_data.pt')
    test_data  = torch.load('ed_test_data.pt')
    train_faces, train_labels, train_landmarks = train_data['faces'], train_data['labels'], train_data['landmarks']
    test_faces, test_labels, test_landmarks    = test_data['faces'],  test_data['labels'],  test_data['landmarks']
else:
    train_faces, train_labels, train_landmarks = preprocess_faces(train_raw, 'train')
    test_faces, test_labels, test_landmarks    = preprocess_faces(test_raw, 'test')
    torch.save({'faces': train_faces, 'labels': train_labels, 'landmarks': train_landmarks}, 'ed_train_data.pt')
    torch.save({'faces': test_faces,  'labels': test_labels,  'landmarks': test_landmarks},  'ed_test_data.pt')

# ---------------------------
# Compute global mean/std for landmarks
# ---------------------------
all_landmarks = np.array([np.array(l).flatten() for l in train_landmarks])
lm_mean = torch.tensor(all_landmarks.mean(axis=0), dtype=torch.float)
lm_std  = torch.tensor(all_landmarks.std(axis=0), dtype=torch.float) + 1e-6

# ---------------------------
# Dataset with stronger augmentation
# ---------------------------
class EmotionDataset(Dataset):
    def __init__(self, faces, labels, landmarks, augment=True, lm_mean=None, lm_std=None):
        self.faces = faces
        self.labels = labels
        self.landmarks = landmarks
        self.augment = augment
        self.lm_mean = lm_mean
        self.lm_std = lm_std
        self.aug_transform = transforms.Compose([
            transforms.RandomResizedCrop(128, scale=(0.8,1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ColorJitter(0.2,0.15,0.15),
            transforms.RandomApply([transforms.RandomAffine(10, translate=(0.1,0.1))], p=0.3),
            transforms.RandomErasing(p=0.2, scale=(0.02,0.1)),  # new
            transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
        ])
        self.base_transform = transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])

    def __len__(self): return len(self.faces)

    def __getitem__(self, idx):
        face = self.faces[idx]
        landmarks = torch.tensor(np.array(self.landmarks[idx]).flatten(), dtype=torch.float)
        landmarks = (landmarks - self.lm_mean) / self.lm_std
        face = self.aug_transform(face) if self.augment else self.base_transform(face)
        return face, landmarks, self.labels[idx]

train_dataset = EmotionDataset(train_faces, train_labels, train_landmarks, augment=True, lm_mean=lm_mean, lm_std=lm_std)
test_dataset  = EmotionDataset(test_faces,  test_labels,  test_landmarks, augment=False, lm_mean=lm_mean, lm_std=lm_std)

# ---------------------------
# DataLoader with balancing
# ---------------------------
class_counts = Counter(train_labels.numpy())
median = np.median(list(class_counts.values()))
class_weights = {k: median/v for k,v in class_counts.items()}
sample_weights = [class_weights[l.item()] for l in train_labels]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

train_loader = DataLoader(train_dataset, batch_size=64, sampler=sampler)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

# ---------------------------
# Model: deeper classifier & landmark MLP
# ---------------------------
class EmotionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3,64,3,padding=1), nn.BatchNorm2d(64), nn.LeakyReLU(0.1), nn.MaxPool2d(2), nn.Dropout(0.3),
            nn.Conv2d(64,128,3,padding=1), nn.BatchNorm2d(128), nn.LeakyReLU(0.1), nn.MaxPool2d(2), nn.Dropout(0.4),
            nn.Conv2d(128,256,3,padding=1), nn.BatchNorm2d(256), nn.LeakyReLU(0.1), nn.AdaptiveAvgPool2d((4,4)), nn.Dropout(0.4)
        )
        self.landmark_mlp = nn.Sequential(
            nn.Linear(1404, 256), nn.BatchNorm1d(256), nn.LeakyReLU(0.1), nn.Dropout(0.4)
        )
        self.classifier = nn.Sequential(
            nn.Linear(256*4*4 + 256, 256), nn.BatchNorm1d(256), nn.LeakyReLU(0.1), nn.Dropout(0.4),
            nn.Linear(256,128), nn.BatchNorm1d(128), nn.LeakyReLU(0.1), nn.Dropout(0.3),
            nn.Linear(128,7)
        )

    def forward(self, x, landmarks):
        x = self.features(x)
        x = torch.flatten(x,1)
        l = self.landmark_mlp(landmarks)
        x = torch.cat([x, l], dim=1)
        return self.classifier(x)

model = EmotionCNN().to(device)

# ---------------------------
# Optimizer & scheduler
# ---------------------------
criterion = nn.CrossEntropyLoss(label_smoothing=0.05)  # reduced smoothing
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

# ---------------------------
# Training loop
# ---------------------------
epochs, patience, best_f1, counter = 50, 10, 0, 0
print("\n Starting training")

for epoch in range(epochs):
    model.train()
    running_loss=0.0
    for images, landmarks, labels in tqdm(train_loader, desc=f"Epoch [{epoch+1}/{epochs}]"):
        images, landmarks, labels = images.to(device), landmarks.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images, landmarks)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        running_loss += loss.item()
    avg_loss = running_loss/len(train_loader)

    # Eval with TTA
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, landmarks, labels in test_loader:
            images, landmarks = images.to(device), landmarks.to(device)
            outputs = model(images, landmarks)
            outputs_flip = model(torch.flip(images,dims=[3]), landmarks)
            outputs_mean = (outputs + outputs_flip)/2
            _, preds = outputs_mean.max(1)
            y_pred.extend(preds.cpu().numpy())
            y_true.extend(labels.numpy())

    val_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    val_acc= accuracy_score(y_true, y_pred)
    print(f"Epoch [{epoch+1}] Loss:{avg_loss:.4f} | Val Acc:{val_acc:.4f} | Val F1:{val_f1:.4f}")

    scheduler.step()

    if val_f1>best_f1:
        best_f1, counter = val_f1, 0
        torch.save(model.state_dict(), "ed_final_best_model.pth")
        print(" Saved best model")
    else:
        counter+=1
        if counter>=patience:
            print(" Early stopping")
            break

# ---------------------------
# Final evaluation
# ---------------------------
model.load_state_dict(torch.load("ed_final_best_model.pth"))
model.eval()

def evaluate(loader):
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, landmarks, labels in loader:
            images, landmarks = images.to(device), landmarks.to(device)
            outputs = model(images, landmarks)
            outputs_flip = model(torch.flip(images,dims=[3]), landmarks)
            outputs_mean = (outputs + outputs_flip)/2
            _, preds = outputs_mean.max(1)
            y_pred.extend(preds.cpu().numpy())
            y_true.extend(labels.numpy())
    return {
        'accuracy': accuracy_score(y_true,y_pred),
        'precision': precision_score(y_true,y_pred,average='macro', zero_division=0),
        'recall': recall_score(y_true,y_pred,average='macro', zero_division=0),
        'f1': f1_score(y_true,y_pred,average='macro', zero_division=0)
    }

train_metrics = evaluate(train_loader)
test_metrics  = evaluate(test_loader)

print("\n Final Performance:")
print(f"Train Accuracy: {train_metrics['accuracy']:.4f}")
print(f"Test Accuracy:  {test_metrics['accuracy']:.4f}")
print(f"Test F1-Score:  {test_metrics['f1']:.4f}")

with open("ed_final_train_metrics.json","w") as f: json.dump(train_metrics,f,indent=2)
with open("ed_final_test_metrics.json","w") as f: json.dump(test_metrics,f,indent=2)

print("  Done!") 

  from .autonotebook import tqdm as notebook_tqdm


Device: cpu
 Loading preprocessed data...

 Starting training


Epoch [1/50]: 100%|██████████| 191/191 [09:31<00:00,  2.99s/it]


Epoch [1] Loss:1.8585 | Val Acc:0.3083 | Val F1:0.2495
 Saved best model


Epoch [2/50]: 100%|██████████| 191/191 [12:22<00:00,  3.89s/it]


Epoch [2] Loss:1.6819 | Val Acc:0.4438 | Val F1:0.3560
 Saved best model


Epoch [3/50]: 100%|██████████| 191/191 [13:24<00:00,  4.21s/it]


Epoch [3] Loss:1.6101 | Val Acc:0.4230 | Val F1:0.3502


Epoch [4/50]: 100%|██████████| 191/191 [10:23<00:00,  3.26s/it]


Epoch [4] Loss:1.5720 | Val Acc:0.3943 | Val F1:0.3404


Epoch [5/50]: 100%|██████████| 191/191 [10:08<00:00,  3.19s/it]


Epoch [5] Loss:1.5358 | Val Acc:0.4362 | Val F1:0.3819
 Saved best model


Epoch [6/50]: 100%|██████████| 191/191 [09:55<00:00,  3.12s/it]


Epoch [6] Loss:1.5206 | Val Acc:0.4794 | Val F1:0.4092
 Saved best model


Epoch [7/50]: 100%|██████████| 191/191 [10:00<00:00,  3.14s/it]


Epoch [7] Loss:1.4782 | Val Acc:0.4758 | Val F1:0.4214
 Saved best model


Epoch [8/50]: 100%|██████████| 191/191 [4:17:19<00:00, 80.83s/it]     


Epoch [8] Loss:1.4812 | Val Acc:0.4415 | Val F1:0.3916


Epoch [9/50]: 100%|██████████| 191/191 [08:18<00:00,  2.61s/it]


Epoch [9] Loss:1.4496 | Val Acc:0.4702 | Val F1:0.4080


Epoch [10/50]: 100%|██████████| 191/191 [07:56<00:00,  2.49s/it]


Epoch [10] Loss:1.4469 | Val Acc:0.4936 | Val F1:0.4328
 Saved best model


Epoch [11/50]: 100%|██████████| 191/191 [07:37<00:00,  2.40s/it]


Epoch [11] Loss:1.4250 | Val Acc:0.4738 | Val F1:0.4197


Epoch [12/50]: 100%|██████████| 191/191 [07:39<00:00,  2.41s/it]


Epoch [12] Loss:1.4090 | Val Acc:0.5048 | Val F1:0.4296


Epoch [13/50]: 100%|██████████| 191/191 [07:43<00:00,  2.43s/it]


Epoch [13] Loss:1.3943 | Val Acc:0.5002 | Val F1:0.4370
 Saved best model


Epoch [14/50]: 100%|██████████| 191/191 [07:42<00:00,  2.42s/it]


Epoch [14] Loss:1.3923 | Val Acc:0.4517 | Val F1:0.4102


Epoch [15/50]: 100%|██████████| 191/191 [07:47<00:00,  2.45s/it]


Epoch [15] Loss:1.3645 | Val Acc:0.4929 | Val F1:0.4375
 Saved best model


Epoch [16/50]: 100%|██████████| 191/191 [07:38<00:00,  2.40s/it]


Epoch [16] Loss:1.3592 | Val Acc:0.5236 | Val F1:0.4645
 Saved best model


Epoch [17/50]: 100%|██████████| 191/191 [07:48<00:00,  2.45s/it]


Epoch [17] Loss:1.3453 | Val Acc:0.4909 | Val F1:0.4500


Epoch [18/50]: 100%|██████████| 191/191 [07:41<00:00,  2.42s/it]


Epoch [18] Loss:1.3462 | Val Acc:0.5084 | Val F1:0.4728
 Saved best model


Epoch [19/50]: 100%|██████████| 191/191 [07:40<00:00,  2.41s/it]


Epoch [19] Loss:1.3529 | Val Acc:0.5183 | Val F1:0.4765
 Saved best model


Epoch [20/50]: 100%|██████████| 191/191 [07:38<00:00,  2.40s/it]


Epoch [20] Loss:1.3193 | Val Acc:0.5160 | Val F1:0.4724


Epoch [21/50]: 100%|██████████| 191/191 [07:44<00:00,  2.43s/it]


Epoch [21] Loss:1.3071 | Val Acc:0.4705 | Val F1:0.4400


Epoch [22/50]: 100%|██████████| 191/191 [07:45<00:00,  2.44s/it]


Epoch [22] Loss:1.3265 | Val Acc:0.5242 | Val F1:0.4775
 Saved best model


Epoch [23/50]: 100%|██████████| 191/191 [07:38<00:00,  2.40s/it]


Epoch [23] Loss:1.3106 | Val Acc:0.5203 | Val F1:0.4833
 Saved best model


Epoch [24/50]: 100%|██████████| 191/191 [07:39<00:00,  2.41s/it]


Epoch [24] Loss:1.3068 | Val Acc:0.5223 | Val F1:0.4790


Epoch [25/50]: 100%|██████████| 191/191 [07:51<00:00,  2.47s/it]


Epoch [25] Loss:1.2736 | Val Acc:0.5364 | Val F1:0.4968
 Saved best model


Epoch [26/50]: 100%|██████████| 191/191 [07:40<00:00,  2.41s/it]


Epoch [26] Loss:1.2906 | Val Acc:0.5236 | Val F1:0.4869


Epoch [27/50]: 100%|██████████| 191/191 [07:38<00:00,  2.40s/it]


Epoch [27] Loss:1.2840 | Val Acc:0.5302 | Val F1:0.4953


Epoch [28/50]: 100%|██████████| 191/191 [07:43<00:00,  2.43s/it]


Epoch [28] Loss:1.2735 | Val Acc:0.5354 | Val F1:0.5073
 Saved best model


Epoch [29/50]: 100%|██████████| 191/191 [07:46<00:00,  2.44s/it]


Epoch [29] Loss:1.2586 | Val Acc:0.5298 | Val F1:0.4952


Epoch [30/50]: 100%|██████████| 191/191 [07:44<00:00,  2.43s/it]


Epoch [30] Loss:1.2636 | Val Acc:0.5153 | Val F1:0.5052


Epoch [31/50]: 100%|██████████| 191/191 [07:46<00:00,  2.44s/it]


Epoch [31] Loss:1.2578 | Val Acc:0.5223 | Val F1:0.4948


Epoch [32/50]: 100%|██████████| 191/191 [07:44<00:00,  2.43s/it]


Epoch [32] Loss:1.2517 | Val Acc:0.5338 | Val F1:0.5073
 Saved best model


Epoch [33/50]: 100%|██████████| 191/191 [07:42<00:00,  2.42s/it]


Epoch [33] Loss:1.2470 | Val Acc:0.5523 | Val F1:0.5109
 Saved best model


Epoch [34/50]: 100%|██████████| 191/191 [07:52<00:00,  2.47s/it]


Epoch [34] Loss:1.2485 | Val Acc:0.5285 | Val F1:0.5000


Epoch [35/50]: 100%|██████████| 191/191 [07:55<00:00,  2.49s/it]


Epoch [35] Loss:1.2291 | Val Acc:0.5401 | Val F1:0.5040


Epoch [36/50]: 100%|██████████| 191/191 [07:52<00:00,  2.47s/it]


Epoch [36] Loss:1.2381 | Val Acc:0.5265 | Val F1:0.4966


Epoch [37/50]: 100%|██████████| 191/191 [07:47<00:00,  2.45s/it]


Epoch [37] Loss:1.2410 | Val Acc:0.5387 | Val F1:0.5022


Epoch [38/50]: 100%|██████████| 191/191 [07:48<00:00,  2.45s/it]


Epoch [38] Loss:1.2283 | Val Acc:0.5394 | Val F1:0.4980


Epoch [39/50]: 100%|██████████| 191/191 [08:07<00:00,  2.55s/it]


Epoch [39] Loss:1.2333 | Val Acc:0.5364 | Val F1:0.5037


Epoch [40/50]: 100%|██████████| 191/191 [08:23<00:00,  2.64s/it]


Epoch [40] Loss:1.2201 | Val Acc:0.5457 | Val F1:0.5102


Epoch [41/50]: 100%|██████████| 191/191 [07:49<00:00,  2.46s/it]


Epoch [41] Loss:1.2224 | Val Acc:0.5473 | Val F1:0.5069


Epoch [42/50]: 100%|██████████| 191/191 [07:33<00:00,  2.37s/it]


Epoch [42] Loss:1.2075 | Val Acc:0.5364 | Val F1:0.5019


Epoch [43/50]: 100%|██████████| 191/191 [08:30<00:00,  2.67s/it]


Epoch [43] Loss:1.2179 | Val Acc:0.5447 | Val F1:0.5083
 Early stopping

 Final Performance:
Train Accuracy: 0.6172
Test Accuracy:  0.5523
Test F1-Score:  0.5109
  Done!
