<a href="https://colab.research.google.com/github/wandb/edu/blob/main/lightning/projects/emotion_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="https://i.imgur.com/gb6B4ig.png" width="400" alt="Weights & Biases" />

# Emotion Classifier

In [None]:
%%capture
!pip install pytorch_lightning torchviz wandb

repo_url = "https://raw.githubusercontent.com/wandb/edu/main/"
utils_path = "lightning/utils.py"
# Download a util file of helper methods for this notebook
!curl {repo_url + utils_path} --output utils.py

In [None]:
from pathlib import Path

import pandas as pd
import pytorch_lightning as pl
import numpy as np
import subprocess
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.datasets
import os

import wandb

import utils

## Facial Expression `DataModule` and `DataLoaders`

In [None]:
class FERDataModule(pl.LightningDataModule):
  """DataModule for downloading and preparing the FER2013 dataset.
  """
  tar_url = "https://www.dropbox.com/s/opuvvdv3uligypx/fer2013.tar"
  local_path = Path("fer2013")

  def __init__(self, batch_size=64):
    super().__init__()  # ⚡: we inherit from LightningDataModule
    self.batch_size = batch_size
    self.val_batch_size = 10 * self.batch_size

  def prepare_data(self, validation_size=0.2, force_reload=False):
    # ⚡: how do we set up the data?
    if hasattr(self, "training_data") and not force_reload:
      return  # only re-run if we haven't been run before

    # download the data from the internet
    self.download_data()

    # read it from a .csv file
    faces, emotions = self.read_data()

    # normalize it
    faces = torch.divide(faces, 255.)

    # split it into training and validation
    validation_size = int(len(faces) * 0.8)

    self.training_data = torch.utils.data.TensorDataset(
      faces[:-validation_size], emotions[:-validation_size])
    self.validation_data = torch.utils.data.TensorDataset(
      faces[-validation_size:], emotions[-validation_size:])
    
    # record metadata
    self.num_total, self.num_classes = emotions.shape[0], torch.max(emotions)
    self.num_train = self.num_total - validation_size
    self.num_validation = validation_size

  def train_dataloader(self):  # ⚡: how do we go from dataset to dataloader?
    """The DataLoaders returned by a DataModule produce data for a model.
    
    This DataLoader is used during training."""
    return DataLoader(self.training_data, batch_size=self.batch_size)

  def val_dataloader(self):  # ⚡: what about during validation?
    """The DataLoaders returned by a DataModule produce data for a model.
    
    This DataLoader is used during validation, at the end of each epoch."""
    return DataLoader(self.validation_data, batch_size=self.val_batch_size)

  def download_data(self):
    if not os.path.exists(self.local_path):
      print("Downloading the face emotion dataset...")
      subprocess.check_output(
          f"curl -SL {self.tar_url} | tar xz", shell=True)
      print("...done")
      
  def read_data(self):
    """Read the data from a .csv into torch Tensors"""
    data = pd.read_csv(self.local_path / "fer2013.csv")
    pixels = data["pixels"].tolist()
    width, height = 48, 48
    faces = []
    for pixel_sequence in pixels:
        face = np.asarray(pixel_sequence.split(
            ' '), dtype=np.uint8).reshape(width, height)
        faces.append(face.astype("float32"))

    faces = np.asarray(faces)
    emotions = data["emotion"].to_numpy()

    return torch.tensor(faces), torch.tensor(emotions)

## Defining the `Model`

In [None]:
class LitEmotionClassifier(utils.LoggedImageClassifierModule):

  def __init__(self, config, max_images_to_display=32):
    super().__init__(max_images_to_display=max_images_to_display)
    self.linear = torch.nn.Linear(48 * 48 * 1, 7)
    self.labels = ["Angry", "Disgusted", "Afraid", "Happy",
                   "Sad", "Surprised", "Neutral"]

    self.optimizer = config["optimizer"]
    self.optimizer_params = config["optimizer.params"]
    self.loss = config["loss"]

  def forward(self, x):
    x = torch.flatten(x, start_dim=1)
    x = self.linear(x)
    return F.log_softmax(x, dim=1)

  def configure_optimizers(self):
    return self.optimizer(self.parameters(), **self.optimizer_params)

## Building the `Model` and Loading the Data

In [None]:
config = {
  "batch_size": 256,
  "max_epochs": 10,
  "activation": torch.nn.ReLU(),
  "loss": torch.nn.NLLLoss(),
  "optimizer": torch.optim.Adam,
  "optimizer.params": {"lr": 0.001},
}

dmodule = FERDataModule(batch_size=config["batch_size"])
lec = LitEmotionClassifier(config)
dmodule.prepare_data()

### Debugging Code

In [None]:
# for debugging purposes (checking shapes, etc.), make these available
dloader = dmodule.train_dataloader()  # set up the Loader

example_batch = next(iter(dloader))  # grab a batch from the Loader
example_x, example_y = example_batch[0].to("cuda"), example_batch[1].to("cuda")

print(f"Input Shape: {example_x.shape}")
print(f"Target Shape: {example_y.shape}")

lec.to("cuda")
outputs = lec.forward(example_x)
print(f"Output Shape: {outputs.shape}")
print(f"Loss : {lec.loss(outputs, example_y)}")

### Running `.fit`

In [None]:
# 👟 configure Trainer 
trainer = pl.Trainer(gpus=1,  # use the GPU for .forward
                     logger=pl.loggers.WandbLogger(
                       project="lit-fer", entity="wandb", config=config,
                       save_code=True),  # log to Weights & Biases
                     max_epochs=config["max_epochs"], log_every_n_steps=1)

# 🏃‍♀️ run the Trainer on the model
trainer.fit(lec, dmodule)

# 💾 save the model
torch.save(lec, "model.pt")
wandb.save("model.pt")

# 🏁 close out the run
wandb.finish()