### Idea
This notebook tries a approach with existing models to classify laughter in videos.

#### Process
1. Fetch Data Set (FER2013, RAVDESS, AffectNet)
2. Use OpenCV to cut down the images
3. Use a CNN to extract features
4. 

In [None]:
# # get dataset

# from datasets import load_dataset

# ds = load_dataset("zrthxn/SmilingOrNot") # this dataset uses binary labels (12k images with 1200 labeled 50/50, 64px)
# # ds = load_dataset("akomand/celeba-smile") # this dataset uses text lables (200k images, 178px)

  from .autonotebook import tqdm as notebook_tqdm
Repo card metadata block was not found. Setting CardData to empty.
Generating train split: 100%|██████████| 13233/13233 [00:02<00:00, 6518.52 examples/s] 


In [None]:
from datasets import load_dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import torch

# Laden des SmilingOrNot-Datensatzes (dein ursprünglicher Datensatz)
ds_smile = load_dataset("zrthxn/SmilingOrNot")

# Laden des AffectNet-Datensatzes
ds_affectnet = load_dataset("affectnet", name="expression")

# Filtern des AffectNet-Datensatzes nach "Happiness"
ds_affectnet_happy = ds_affectnet.filter(lambda example: example["expression"] == 3) # 3 represents happiness in affectnet

# Transformieren der Bilder
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Anpassen an die Größe des SmilingOrNot-Datensatzes
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalisierung für vortrainierte CNNs
])

def transform_images(examples):
    images = [transform(image.convert("RGB")) for image in examples["image"]]
    return {"pixel_values": images}

ds_smile = ds_smile.with_transform(transform_images)
ds_affectnet_happy = ds_affectnet_happy.with_transform(transform_images)

# Erstellen von DataLoaders
batch_size = 32
train_dataloader_smile = DataLoader(ds_smile["train"], batch_size=batch_size, shuffle=True)
test_dataloader_smile = DataLoader(ds_smile["test"], batch_size=batch_size)

train_dataloader_affectnet = DataLoader(ds_affectnet_happy["train"], batch_size=batch_size, shuffle=True)
val_dataloader_affectnet = DataLoader(ds_affectnet_happy["validation"], batch_size=batch_size)

# Erstellen von Labels für AffectNet
def create_affectnet_labels(examples):
    labels = [1] * len(examples["expression"]) # 1 represents smiling/laughing
    return {"labels": labels}

ds_affectnet_happy = ds_affectnet_happy.map(create_affectnet_labels, batched=True)

# Erstellen von Labels für SmilingOrNot
def create_smile_labels(examples):
    return {"labels": examples["label"]}

ds_smile = ds_smile.map(create_smile_labels, batched=True)

# Erstellen von DataLoaders mit Labels
train_dataloader_smile = DataLoader(ds_smile["train"], batch_size=batch_size, shuffle=True)
test_dataloader_smile = DataLoader(ds_smile["test"], batch_size=batch_size)

train_dataloader_affectnet = DataLoader(ds_affectnet_happy["train"], batch_size=batch_size, shuffle=True)
val_dataloader_affectnet = DataLoader(ds_affectnet_happy["validation"], batch_size=batch_size)

In [None]:
import torch.nn as nn
import torchvision.models as models

model = models.resnet18(pretrained=True)  # Beispiel: ResNet18
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 Klassen: Lachen/Schmunzeln, neutral

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    for batch in train_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validierung
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_dataloader:
            inputs = batch["pixel_values"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Epoch {epoch+1}/{num_epochs}, Val Loss: {val_loss/len(val_dataloader)}, Val Accuracy: {100*correct/total}%")

In [None]:
# Testen
model.eval()
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Test Loss: {test_loss/len(test_dataloader)}, Test Accuracy: {100*correct/total}%")