<img src="https://i.imgur.com/gb6B4ig.png" width="400" alt="Weights & Biases" />

# Emotion Classifier

In [1]:
%%capture
!pip install pytorch_lightning torchviz wandb

repo_url = "https://raw.githubusercontent.com/wandb/edu/main/"
utils_path = "lightning/utils.py"
!curl {repo_url + utils_path} --output utils.py

In [3]:
from pathlib import Path

import math
import pandas as pd
import pytorch_lightning as pl
import numpy as np
import subprocess
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.datasets
import os

import wandb

import utils

## Facial Expression `DataModule` and `DataLoaders`

In [4]:
class FERDataModule(pl.LightningDataModule):
  """DataModule for downloading and preparing the FER2013 dataset."""
  
  tar_url = "https://www.dropbox.com/s/opuvvdv3uligypx/fer2013.tar"
  local_path = Path("fer2013")

  def __init__(self, batch_size=64):
    super().__init__()  # ⚡: we inherit from LightningDataModule
    self.batch_size = batch_size
    self.val_batch_size = 10 * self.batch_size

  def prepare_data(self, validation_size=0.2, force_reload=False):
    # ⚡: how do we set up the data?
    if hasattr(self, "training_data") and not force_reload:
      return  # only re-run if we haven't been run before

    # download the data from the internet
    self.download_data()

    # read it from a .csv file
    faces, emotions = self.read_data()

    # normalize it
    faces = torch.divide(faces, 255.)

    # split it into training and validation
    validation_size = int(len(faces) * 0.8)

    self.training_data = torch.utils.data.TensorDataset(
      faces[:-validation_size], emotions[:-validation_size])
    self.validation_data = torch.utils.data.TensorDataset(
      faces[-validation_size:], emotions[-validation_size:])
    
    # record metadata
    self.num_total, self.num_classes = emotions.shape[0], torch.max(emotions)
    self.num_train = self.num_total - validation_size
    self.num_validation = validation_size

  def train_dataloader(self):  # ⚡: how do we go from dataset to dataloader?
    """The DataLoaders returned by a DataModule produce data for a model.
    
    This DataLoader is used during training."""
    return DataLoader(self.training_data, batch_size=self.batch_size)

  def val_dataloader(self):  # ⚡: what about during validation?
    """The DataLoaders returned by a DataModule produce data for a model.
    
    This DataLoader is used during validation, at the end of each epoch."""
    return DataLoader(self.validation_data, batch_size=self.val_batch_size)

  def download_data(self):
    if not os.path.exists(self.local_path):
      print("Downloading the face emotion dataset...")
      subprocess.check_output(
          f"curl -SL {self.tar_url} | tar xz", shell=True)
      print("...done")
      
  def read_data(self):
    """Read the data from a .csv into torch Tensors"""
    data = pd.read_csv(self.local_path / "fer2013.csv")
    pixels = data["pixels"].tolist()
    width, height = 48, 48
    faces = []
    for pixel_sequence in pixels:
        face = np.asarray(pixel_sequence.split(
            ' '), dtype=np.uint8).reshape(width, height)
        faces.append(face.astype("float32"))

    faces = np.asarray(faces)
    emotions = data["emotion"].to_numpy()

    return torch.tensor(faces), torch.tensor(emotions)

## Defining the `Model`

**The given model, which was able to predict at around 30% accuracy.**

In [21]:
class LitEmotionClassifier(utils.LoggedImageClassifierModule):

  def __init__(self, config, max_images_to_display=32):
    super().__init__(max_images_to_display=max_images_to_display)
    self.linear = torch.nn.Linear(48 * 48 * 1, 7)
    self.labels = ["Angry", "Disgusted", "Afraid", "Happy",
                   "Sad", "Surprised", "Neutral"]

    self.optimizer = config["optimizer"]
    self.optimizer_params = config["optimizer.params"]
    self.loss = config["loss"]

  def forward(self, x):
    x = torch.flatten(x, start_dim=1)
    x = self.linear(x)
    return F.log_softmax(x, dim=1)

  def configure_optimizers(self):
    return self.optimizer(self.parameters(), **self.optimizer_params)

The model that I created by copying and tweaking a model made by Weights and Biases for MNIST categorization.

In [5]:
class FullyConnected(pl.LightningModule):

  def __init__(self, in_features, out_features, activation=None):
    super().__init__()
    self.linear = torch.nn.Linear(in_features, out_features)
    if activation is None:
      activation = torch.nn.Identity()  # defaults to passing inputs unchanged
    self.activation = activation

  def forward(self, x):
    return self.activation(self.linear(x))


class Convolution(pl.LightningModule):

  def __init__(self, in_channels, out_channels, kernel_size,
               activation=None):
    super().__init__()
    self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size)
    if activation is None:
      activation = torch.nn.Identity()  # defaults to passing inputs unchanged
    self.activation = activation

  def forward(self, x):
    return self.activation(self.conv2d(x))


class LitEmotionClassifier(utils.LoggedImageClassifierModule):
  """A simple CNN Model, with under-the-hood wandb
  and pytorch-lightning channels (logging, metrics, etc.).
  """
  
  def __init__(self, config, max_images_to_display=32, labels = ["Angry", "Disgusted", "Afraid", "Happy",
                   "Sad", "Surprised", "Neutral" ]):  # make the model
    super().__init__(max_images_to_display=max_images_to_display)
    self.labels = labels
   
    # first convolutional component
    self.conv_layers = torch.nn.ModuleList([ 
                                            
      # hidden conv layer
      Convolution(in_channels=1, kernel_size=config["kernel_size"],
                  activation=config["activation"],
                  out_channels=config["conv.channels"][0]),
      # hidden conv layer
      Convolution(in_channels=config["conv.channels"][0], kernel_size=config["kernel_size"],
                  activation=config["activation"],
                  out_channels=config["conv.channels"][1]),
      # pooling often follows 2 convs
      torch.nn.MaxPool2d(config["pool_size"]),
    ])


    # need a fixed-size input for fully-connected component,
    #  so apply a "re-sizing" layer, to size set in config
    self.resize_layer = torch.nn.AdaptiveAvgPool2d(
      (config["final_height"], config["final_width"]))

    final_size = config["final_height"] * config["final_width"] * config["conv.channels"][-1]
    self.fc_layers = torch.nn.ModuleList([ 
      FullyConnected(in_features=final_size, activation=config["activation"],
                     out_features=config["fc1.size"]),
      FullyConnected(in_features=config["fc1.size"], activation=config["activation"],
                     out_features=config["fc2.size"]),
      FullyConnected(in_features=config["fc2.size"],  # "read-out" layer
                     out_features=10),
    ])

    self.loss = config["loss"]
    self.optimizer = config["optimizer"]
    self.optimizer_params = config["optimizer.params"]
    config.update({f"channels_{ii}": channels
                   for ii, channels in enumerate(config["conv.channels"])})

  def forward(self, x):  # produce outputs
    x = torch.unsqueeze(x, 1)  # adding singleton channel dimension
    # first apply convolutional layers
    for layer in self.conv_layers: 
      x = layer(x)

    # then convert to a fixed-size vector
    x = self.resize_layer(x)
    x = torch.flatten(x, start_dim=1)

    # then apply the fully-connected layers
    for layer in self.fc_layers: # snap together the LEGOs
      x = layer(x)

    return F.log_softmax(x, dim=1)  # compute log of softmax, for numerical reasons

  def configure_optimizers(self):  # ⚡: setup for .fit
    return self.optimizer(self.parameters(), **self.optimizer_params)

## Building the `Model` and Loading the Data

In [6]:
config = {
  "batch_size": 256,
  "max_epochs": 10,
  "activation": torch.nn.ReLU(),
  "loss": torch.nn.NLLLoss(),
  "optimizer": torch.optim.Adam,
  "optimizer.params": {"lr": 0.0001},
  "kernel_size": 7,
  "conv.channels": [128, 256],
  "pool_size": 2,
   "final_height": 10,
  "final_width": 10,
  "fc1.size": 1024,
  "fc2.size": 512,
}

dmodule = FERDataModule(batch_size=config["batch_size"])
lec = LitEmotionClassifier(config)
dmodule.prepare_data()

### Debugging Code

In [9]:
# for debugging purposes (checking shapes, etc.), make these available
dloader = dmodule.train_dataloader()  # set up the Loader

example_batch = next(iter(dloader))  # grab a batch from the Loader
example_x, example_y = example_batch[0].to("cuda"), example_batch[1].to("cuda")

print(f"Input Shape: {example_x.shape}")
print(f"Target Shape: {example_y.shape}")

lec.to("cuda")
outputs = lec.forward(example_x)
print(f"Output Shape: {outputs.shape}")
print(f"Loss : {lec.loss(outputs, example_y)}")

Input Shape: torch.Size([256, 48, 48])
Target Shape: torch.Size([256])
Output Shape: torch.Size([256, 10])
Loss : 2.304185152053833


### Running `.fit`

In [11]:
config.update({"team": 2})  # use this to add your runs to a team

In [12]:
# 👟 configure Trainer 
trainer = pl.Trainer(gpus=1,  # use the GPU for .forward
                     logger=pl.loggers.WandbLogger(
                       project="lit-fer", entity="ucla-acm-ai", config=config,
                       save_code=True),  # log to Weights & Biases
                     max_epochs=config["max_epochs"], log_every_n_steps=1)

# 🏃‍♀️ run the Trainer on the model
trainer.fit(lec, dmodule)

# 💾 save the model
torch.save(lec, "model.pt")
wandb.save("model.pt")

# 🏁 close out the run
wandb.finish()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name         | Type              | Params
---------------------------------------------------
0 | train_acc    | Accuracy          | 0     
1 | valid_acc    | Accuracy          | 0     
2 | conv_layers  | ModuleList        | 1.6 M 
3 | resize_layer | AdaptiveAvgPool2d | 0     
4 | fc_layers    | ModuleList        | 26.7 M
5 | loss         | NLLLoss           | 0     
---------------------------------------------------
28.4 M    Trainable params
0         Non-trainable params
28.4 M    Total params
113.431   Total estimated model params size (MB)


LitEmotionClassifier(
  (train_acc): Accuracy()
  (valid_acc): Accuracy()
  (conv_layers): ModuleList(
    (0): Convolution(
      (conv2d): Conv2d(1, 128, kernel_size=(7, 7), stride=(1, 1))
      (activation): ReLU()
    )
    (1): Convolution(
      (conv2d): Conv2d(128, 256, kernel_size=(7, 7), stride=(1, 1))
      (activation): ReLU()
    )
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (resize_layer): AdaptiveAvgPool2d(output_size=(10, 10))
  (fc_layers): ModuleList(
    (0): FullyConnected(
      (linear): Linear(in_features=25600, out_features=1024, bias=True)
      (activation): ReLU()
    )
    (1): FullyConnected(
      (linear): Linear(in_features=1024, out_features=512, bias=True)
      (activation): ReLU()
    )
    (2): FullyConnected(
      (linear): Linear(in_features=512, out_features=10, bias=True)
      (activation): Identity()
    )
  )
  (loss): NLLLoss()
)
Parameter Count: 28357642


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




VBox(children=(Label(value=' 132.13MB of 641.59MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.2059366…

0,1
global_step,261.0
training/loss,0.13363
training/accuracy,1.0
epoch,9.0
trainer/global_step,289.0
_runtime,377.0
_timestamp,1618612115.0
_step,299.0
validation/loss,1.47926
validation/accuracy,0.43906


0,1
global_step,▁▂▃▃▄▅▆▆▇█
training/loss,█▆▆▆▆▇▅▅▅▆▆▅▃▄▄▄▆▄▅▅▅▅▃▄▃▄▅▃▂▂▂▂▄▁▃▃▃▃▁▂
training/accuracy,▁▃▃▂▂▁▃▃▂▂▂▃▄▂▃▅▃▅▃▃▃▂▆▄▄▃▃▅▅▆▄▇▄▆▅▄▄▄█▆
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
_timestamp,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
validation/loss,█▆▅▄▄▃▃▂▂▁
validation/accuracy,▁▃▄▄▅▅▆▆▇█
