<a href="https://colab.research.google.com/github/AleksandreBakhtadze/ML-abakh22-facial-expression-recognition/blob/main/facial_expression_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q kaggle
!pip install -q wandb

In [2]:
!pip install wandb
import wandb
wandb.login()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mabakh22[0m ([33mabakh22-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle datasets download -d msambare/fer2013
!unzip -q fer2013.zip

Dataset URL: https://www.kaggle.com/datasets/msambare/fer2013
License(s): DbCL-1.0
Downloading fer2013.zip to /content
 83% 50.0M/60.3M [00:00<00:00, 522MB/s]
100% 60.3M/60.3M [00:00<00:00, 538MB/s]


In [5]:
import os
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

In [6]:
transform = transforms.Compose([
    transforms.Grayscale(),               # Force grayscale
    transforms.Resize((48, 48)),          # Resize to 48x48 pixels
    transforms.ToTensor(),                # Convert to PyTorch tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])


In [7]:
train_dataset = datasets.ImageFolder(root='train', transform=transform)
test_dataset = datasets.ImageFolder(root='test', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Check class names (optional)
print(train_dataset.classes)


['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [8]:
class EmotionCNN(nn.Module):
    def __init__(self):
        super(EmotionCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # (1, 48, 48) -> (32, 48, 48)
            nn.ReLU(),
            nn.MaxPool2d(2),                             # -> (32, 24, 24)
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # -> (64, 24, 24)
            nn.ReLU(),
            nn.MaxPool2d(2),                             # -> (64, 12, 12)
        )
        self.fc = nn.Sequential(
            nn.Linear(64 * 12 * 12, 128),
            nn.ReLU(),
            nn.Linear(128, len(train_dataset.classes))  # Output: num of emotions
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

model = EmotionCNN()

In [10]:
wandb.init(project="facial-expression-recognition", name="simple-cnn-run1")


In [11]:
config = wandb.config
config.epochs = 5
config.batch_size = 64
config.learning_rate = 0.001
config.optimizer = "Adam"
config.architecture = "SimpleCNN"

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train
for epoch in range(5):
    model.train()
    running_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}, Loss: {avg_loss}")

    # log to wandb
    wandb.log({"epoch": epoch+1, "loss": avg_loss})

Epoch 1, Loss: 1.345321045952013
Epoch 2, Loss: 1.2055726477457314
Epoch 3, Loss: 1.0844882904553999
Epoch 4, Loss: 0.9510287311932026
Epoch 5, Loss: 0.8194674294616172


In [13]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 53.76%


In [14]:
wandb.log({"Test Accuracy": 53.76})
wandb.finish()

0,1
Test Accuracy,▁
epoch,▁▃▅▆█
loss,█▆▅▃▁

0,1
Test Accuracy,53.76
epoch,5.0
loss,0.81947
