### Idea
This notebook tries a approach with existing models to classify laughter in videos.

#### Process
1. Fetch Data Set (FER2013, RAVDESS, AffectNet)
2. Use OpenCV to cut down the images
3. Use a CNN to extract features
4. 

In [None]:
import os
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset, WeightedRandomSampler
from PIL import Image
import kagglehub
import numpy as np

# Download AffectNet dataset (adjust path as needed)
affectnet_path = kagglehub.dataset_download("thienkhonghoc/affectnet")

# Transform images
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom dataset for AffectNet
class AffectNetDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Function to recursively find image files
        def find_images(directory):
            for root, _, files in os.walk(directory):
                for file in files:
                    if file.endswith((".jpg", ".png", ".jpeg")):
                        image_path = os.path.join(root, file)
                        try:
                            # Extract label from the parent directory name
                            emotion_label = int(os.path.basename(root).split('_')[0])
                            self.image_paths.append(image_path)
                            self.labels.append(emotion_label)
                        except (ValueError, IndexError):
                            pass  # Skip files in directories without valid labels

        find_images(root_dir)


    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]

        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return {"pixel_values": image, "labels": label}

# Load the AffectNet dataset
affectnet_dataset = AffectNetDataset(root_dir=affectnet_path, transform=transform)

# Modify labels to 0 (not smiling) or 1 (smiling)
for i in range(len(affectnet_dataset.labels)):
    if affectnet_dataset.labels[i] == 3:  # 3 is "happiness"
        affectnet_dataset.labels[i] = 1  # Smiling label
    else:
        affectnet_dataset.labels[i] = 0  # Not smiling label

# Split dataset into train, validation, and test sets
train_size = int(0.8 * len(affectnet_dataset))
val_size = int(0.1 * len(affectnet_dataset))
test_size = len(affectnet_dataset) - train_size - val_size

train_dataset = Subset(affectnet_dataset, range(train_size))
val_dataset = Subset(affectnet_dataset, range(train_size, train_size + val_size))
test_dataset = Subset(affectnet_dataset, range(train_size + val_size, len(affectnet_dataset)))

# Calculate class weights for training set
train_labels = [affectnet_dataset.labels[i] for i in train_dataset.indices]
class_counts = np.bincount(train_labels)
class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)
sample_weights = [class_weights[label] for label in train_labels]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights))

# Create DataLoaders
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler) # use sampler
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [None]:
import torch.nn as nn
import torchvision.models as models

model = models.resnet18(pretrained=True)  # Beispiel: ResNet18
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 Klassen: Lachen/Schmunzeln, neutral

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(device)

for epoch in range(num_epochs):
    model.train()
    for batch in train_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validierung
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_dataloader:
            inputs = batch["pixel_values"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Epoch {epoch+1}/{num_epochs}, Val Loss: {val_loss/len(val_dataloader)}, Val Accuracy: {100*correct/total}%")

In [None]:
model.eval()
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    print(f"Test Loss: {test_loss/len(test_dataloader)}, Test Accuracy: {100*correct/total}%")