# Smile Detection on the Genki-4K Dataset

This notebook demonstrates an end-to-end smile detection pipeline built with PyTorch. It covers data loading, optional face detection/cropping, preprocessing, CNN model training, evaluation, and model export for deployment. The code assumes you have downloaded and unpacked the [Genki-4K dataset](http://mplab.ucsd.edu) locally.


## 1. Setup

Install the required libraries if they are not already present in your environment.


In [None]:
# Optional: uncomment and run if you need to install dependencies
# !pip install torch torchvision torchaudio scikit-learn matplotlib opencv-python tqdm

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("talhasar/genki4k")

print("Path to dataset files:", path)


In [None]:
import os
import cv2
import math
import time
from dataclasses import dataclass
from typing import Callable, Dict, List, Optional, Tuple

import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from tqdm.auto import tqdm

### Reproducibility Helpers

In [None]:
def set_seed(seed: int = 42) -> None:
    # Set random seeds for reproducibility
    import random

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


set_seed()

## 2. Configuration

In [None]:
@dataclass
class TrainingConfig:
    dataset_root: str = "/path/to/genki4k"  # Change to your dataset root
    labels_file: str = "labels.txt"
    image_size: Tuple[int, int] = (128, 128)
    batch_size: int = 64
    num_workers: int = min(8, os.cpu_count() or 2)
    learning_rate: float = 1e-3
    weight_decay: float = 1e-4
    num_epochs: int = 25
    train_ratio: float = 0.7
    val_ratio: float = 0.15
    use_cuda: bool = torch.cuda.is_available()
    face_detection: bool = True
    min_detection_confidence: float = 1.1  # scale factor for Haar cascade
    cascade_path: Optional[str] = None  # leave None to use OpenCV's default
    model_dir: str = "artifacts"
    model_name: str = "smile_cnn_genki4k.pt"


config = TrainingConfig()
os.makedirs(config.model_dir, exist_ok=True)
device = torch.device("cuda" if config.use_cuda else "cpu")
device

## 3. Data Loading & Preprocessing

### 3.1 Label Parsing Utility

The Genki-4K `labels.txt` file contains one sample per line. Each line starts with a smile label (`1` for smiling, `-1` for not smiling) followed by metadata and the relative image path. We only care about the first and last entries.

In [None]:
def parse_genki4k_labels(labels_path: str) -> List[Tuple[str, int]]:
    entries: List[Tuple[str, int]] = []
    with open(labels_path, "r", encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split()
            if not parts:
                continue
            label_raw = parts[0]
            img_path = parts[-1]
            label = 1 if label_raw == "1" else 0
            entries.append((img_path, label))
    return entries

### 3.2 Face Detection Helper

We use OpenCV's Haar Cascade to optionally detect and crop faces. If no face is detected, the original image is returned.

In [None]:
def get_face_detector(cascade_path: Optional[str] = None) -> cv2.CascadeClassifier:
    if cascade_path is None:
        cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
    detector = cv2.CascadeClassifier(cascade_path)
    if detector.empty():
        raise FileNotFoundError(f"Unable to load Haar cascade from {cascade_path}")
    return detector


def detect_and_crop_face(
    image: np.ndarray,
    detector: cv2.CascadeClassifier,
    scale_factor: float = 1.1,
    min_neighbors: int = 5,
    min_size: Tuple[int, int] = (30, 30),
) -> np.ndarray:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = detector.detectMultiScale(gray, scaleFactor=scale_factor, minNeighbors=min_neighbors, minSize=min_size)
    if len(faces) == 0:
        return image
    x, y, w, h = max(faces, key=lambda bbox: bbox[2] * bbox[3])
    cropped = image[y : y + h, x : x + w]
    return cropped

### 3.3 Custom Dataset

In [None]:
class Genki4KDataset(Dataset):
    def __init__(
        self,
        root_dir: str,
        labels_file: str,
        transform: Optional[Callable] = None,
        use_face_detector: bool = False,
        face_detector: Optional[cv2.CascadeClassifier] = None,
        scale_factor: float = 1.1,
    ) -> None:
        labels_path = os.path.join(root_dir, labels_file)
        if not os.path.isfile(labels_path):
            raise FileNotFoundError(f"labels file not found: {labels_path}")

        self.root_dir = root_dir
        self.samples = parse_genki4k_labels(labels_path)
        self.transform = transform
        self.use_face_detector = use_face_detector
        self.face_detector = face_detector
        self.scale_factor = scale_factor

    def __len__(self) -> int:
        return len(self.samples)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
        rel_path, label = self.samples[idx]
        img_path = os.path.join(self.root_dir, rel_path)
        if not os.path.isfile(img_path):
            raise FileNotFoundError(f"image not found: {img_path}")

        image_bgr = cv2.imread(img_path)
        if image_bgr is None:
            raise RuntimeError(f"failed to read image: {img_path}")

        if self.use_face_detector and self.face_detector is not None:
            image_bgr = detect_and_crop_face(image_bgr, self.face_detector, scale_factor=self.scale_factor)

        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        if self.transform:
            image_tensor = self.transform(image_rgb)
        else:
            image_tensor = torch.from_numpy(image_rgb).permute(2, 0, 1).float() / 255.0

        return image_tensor, label

### 3.4 Transforms and Dataloaders

In [None]:
# Build transforms
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(config.image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

val_test_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(config.image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

face_detector = get_face_detector(config.cascade_path) if config.face_detection else None

dataset = Genki4KDataset(
    root_dir=config.dataset_root,
    labels_file=config.labels_file,
    transform=train_transforms,
    use_face_detector=config.face_detection,
    face_detector=face_detector,
    scale_factor=config.min_detection_confidence,
)

# Split dataset
n_total = len(dataset)
n_train = int(n_total * config.train_ratio)
n_val = int(n_total * config.val_ratio)
n_test = n_total - n_train - n_val

train_dataset, val_dataset, test_dataset = random_split(
    dataset,
    lengths=[n_train, n_val, n_test],
    generator=torch.Generator().manual_seed(42),
)

# Update transforms for validation/test sets
train_dataset.dataset.transform = train_transforms
val_dataset.dataset.transform = val_test_transforms
test_dataset.dataset.transform = val_test_transforms

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)
test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)

len(train_dataset), len(val_dataset), len(test_dataset)

> **Note:** Running the cell above requires the dataset to be available at `config.dataset_root`.

## 4. Model Construction

In [None]:
class SmileCNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )

        dummy_input = torch.zeros(1, 3, *config.image_size)
        flatten_dim = self.features(dummy_input).view(1, -1).shape[1]

        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(flatten_dim, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, 1),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = x.flatten(1)
        x = self.classifier(x)
        return x


model = SmileCNN().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

## 5. Training & Validation Loop

In [None]:
def train_one_epoch(model: nn.Module, loader: DataLoader, optimizer: torch.optim.Optimizer, device: torch.device) -> Tuple[float, float]:
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total_samples = 0

    for images, labels in tqdm(loader, desc="Train", leave=False):
        images = images.to(device)
        labels = labels.float().to(device)

        optimizer.zero_grad()
        outputs = model(images).squeeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        preds = torch.sigmoid(outputs) >= 0.5
        running_loss += loss.item() * images.size(0)
        running_corrects += torch.sum(preds == labels.bool()).item()
        total_samples += images.size(0)

    epoch_loss = running_loss / total_samples
    epoch_acc = running_corrects / total_samples
    return epoch_loss, epoch_acc


def evaluate(model: nn.Module, loader: DataLoader, device: torch.device) -> Tuple[float, float]:
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    total_samples = 0

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Eval", leave=False):
            images = images.to(device)
            labels = labels.float().to(device)

            outputs = model(images).squeeze(1)
            loss = criterion(outputs, labels)

            preds = torch.sigmoid(outputs) >= 0.5
            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(preds == labels.bool()).item()
            total_samples += images.size(0)

    epoch_loss = running_loss / total_samples
    epoch_acc = running_corrects / total_samples
    return epoch_loss, epoch_acc

### Training Loop

In [None]:
history: Dict[str, List[float]] = {
    "train_loss": [],
    "train_acc": [],
    "val_loss": [],
    "val_acc": [],
}

best_val_loss = math.inf
best_model_path = os.path.join(config.model_dir, "best_" + config.model_name)

for epoch in range(1, config.num_epochs + 1):
    start_time = time.time()
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, device)

    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), best_model_path)

    elapsed = time.time() - start_time
    print(f"Epoch {epoch:02d}/{config.num_epochs} | Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f} | Time: {elapsed:.1f}s")

print(f"Best validation model saved to: {best_model_path}")

> **Tip:** Use fewer epochs for quick experiments or adjust `batch_size`, `learning_rate`, and augmentations to suit your compute budget.

## 6. Evaluation on the Test Set

In [None]:
# Load the best model before evaluating
model.load_state_dict(torch.load(best_model_path, map_location=device))
model.to(device)

model.eval()
all_preds: List[int] = []
all_probs: List[float] = []
all_labels: List[int] = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Test", leave=False):
        images = images.to(device)
        outputs = model(images).squeeze(1)
        probs = torch.sigmoid(outputs).cpu().numpy()
        preds = (probs >= 0.5).astype(int)

        all_probs.extend(probs.tolist())
        all_preds.extend(preds.tolist())
        all_labels.extend(labels.numpy().tolist())

acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
cm = confusion_matrix(all_labels, all_preds)

print(f"Test Accuracy: {acc:.4f}")
print(f"Test F1-score: {f1:.4f}")
print("Confusion Matrix:
", cm)

### 6.1 Plot Training Curves

In [None]:
epochs = range(1, len(history["train_loss"]) + 1)
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, history["train_loss"], label="Train Loss")
plt.plot(epochs, history["val_loss"], label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Curves")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, history["train_acc"], label="Train Accuracy")
plt.plot(epochs, history["val_acc"], label="Val Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy Curves")
plt.legend()

plt.tight_layout()
plt.show()

## 7. Save the Final Model

In [None]:
final_model_path = os.path.join(config.model_dir, config.model_name)
torch.save({
    "model_state_dict": model.state_dict(),
    "config": config.__dict__,
    "history": history,
}, final_model_path)
print(f"Final model saved to: {final_model_path}")

## 8. Inference Helper (Optional)

Use the following helper to run inference on individual images.

In [None]:
def predict_smile(model: nn.Module, image_path: str, transform: Callable, device: torch.device) -> float:
    image_bgr = cv2.imread(image_path)
    if image_bgr is None:
        raise FileNotFoundError(image_path)

    if config.face_detection and face_detector is not None:
        image_bgr = detect_and_crop_face(image_bgr, face_detector, scale_factor=config.min_detection_confidence)

    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    tensor = transform(image_rgb).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        logits = model(tensor)
        prob = torch.sigmoid(logits).item()
    return prob


# Example usage (update the path to an image file):
# probability = predict_smile(model, "example.jpg", val_test_transforms, device)
# print(f"Smile probability: {probability:.2%}")

---
## 9. Next Steps

- Fine-tune hyperparameters, augmentations, or architecture for better accuracy.
- Try more advanced backbones (ResNet, MobileNet) for improved performance.
- Export the model to ONNX or TorchScript for deployment on mobile/edge devices.