In [1]:
from google.colab import drive
drive.mount('/content/drive')

# download the data from kaggle and unzip

! pip install -q kaggle

! mkdir ~/.kaggle

!cp /content/drive/MyDrive/cs231n/kaggle_API/kaggle.json ~/.kaggle/kaggle.json

! chmod 600 ~/.kaggle/kaggle.json

! kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge

! unzip challenges-in-representation-learning-facial-expression-recognition-challenge.zip

# install wandb if not present
! pip install -q wandb

Mounted at /content/drive
Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 97% 276M/285M [00:00<00:00, 598MB/s]
100% 285M/285M [00:00<00:00, 658MB/s]
Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


In [4]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import wandb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import io
from contextlib import redirect_stdout
from torchsummary import summary

wandb.init(project="ml_assignment_4", name="cnn-3")

# Hyperparameters
config = {
    "epochs": 10,
    "batch_size": 64,
    "learning_rate": 1e-3,
    "image_size": 48,
    "num_classes": 7,
}
wandb.config.update(config)


# Dataset class
class FacialExpressionDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        pixels = np.array(self.df.iloc[idx]['pixels'].split(), dtype=np.uint8).reshape(48, 48)
        image = Image.fromarray(pixels)
        label = int(self.df.iloc[idx]['emotion']) if 'emotion' in self.df.columns else -1

        if self.transform:
            image = self.transform(image)

        return image, label

# Transforms
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load dataset
train_df = pd.read_csv(os.path.expanduser("/content/train.csv"))
train_data, val_data = train_test_split(train_df, test_size=0.1, stratify=train_df['emotion'], random_state=42)

train_dataset = FacialExpressionDataset(train_data, transform=transform)
val_dataset = FacialExpressionDataset(val_data, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)

# Model
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.model = nn.Sequential(
          # Block 1
          nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.ReLU(),
          nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(),
          nn.MaxPool2d(kernel_size=2),  # -> (32, 24, 24)

          # Block 2
          nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(),
          nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(),
          nn.MaxPool2d(kernel_size=2),  # -> (64, 12, 12)

          # Block 3
          nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(),
          nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(),
          nn.MaxPool2d(kernel_size=2),  # -> (128, 6, 6)

          # FC layers
          nn.Flatten(),
          nn.Linear(128 * 6 * 6, 512), nn.ReLU(),
          nn.Dropout(0.3), # added dropout to reduce overfitting
          nn.Linear(512, num_classes)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net(num_classes=config["num_classes"]).to(device)

# log the model summary

f = io.StringIO()
with redirect_stdout(f):
    summary(model, input_size=(1, 48, 48))
model_summary_str = f.getvalue()

print(model_summary_str)

# Log to wandb as formatted HTML (nicely viewable in UI)
wandb.log({"model_summary": wandb.Html(f"<pre>{model_summary_str}</pre>")})

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"])

# Training loop
for epoch in range(config["epochs"]):
    model.train()
    running_loss, running_acc = 0.0, 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * images.size(0)
        running_acc += (preds == labels).sum().item()

    train_loss = running_loss / len(train_loader.dataset)
    train_acc = running_acc / len(train_loader.dataset)

    # Validation
    model.eval()
    val_loss, val_acc = 0.0, 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)
            val_loss += loss.item() * images.size(0)
            val_acc += (preds == labels).sum().item()

    val_loss /= len(val_loader.dataset)
    val_acc /= len(val_loader.dataset)

    # Log to wandb
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_acc,
        "val_loss": val_loss,
        "val_accuracy": val_acc,
    })

    print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Val Acc={val_acc:.4f}")

# Save and log model
torch.save(model.state_dict(), "model.pth")
wandb.save("model.pth")
wandb.finish()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 48, 48]             320
              ReLU-2           [-1, 32, 48, 48]               0
            Conv2d-3           [-1, 32, 48, 48]           9,248
              ReLU-4           [-1, 32, 48, 48]               0
         MaxPool2d-5           [-1, 32, 24, 24]               0
            Conv2d-6           [-1, 64, 24, 24]          18,496
              ReLU-7           [-1, 64, 24, 24]               0
            Conv2d-8           [-1, 64, 24, 24]          36,928
              ReLU-9           [-1, 64, 24, 24]               0
        MaxPool2d-10           [-1, 64, 12, 12]               0
           Conv2d-11          [-1, 128, 12, 12]          73,856
             ReLU-12          [-1, 128, 12, 12]               0
           Conv2d-13          [-1, 128, 12, 12]         147,584
             ReLU-14          [-1, 128,

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▆▅▅▄▄▃▂▂▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▄▃▂▁▁▂▂▄▆

0,1
epoch,10.0
train_accuracy,0.80068
train_loss,0.53265
val_accuracy,0.57715
val_loss,1.46182
