In [1]:
import torch
print(torch.cuda.is_available())        
print(torch.cuda.get_device_name(0))    

True
NVIDIA GeForce RTX 2060


In [2]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.preprocessing import LabelEncoder

class FER2013Dataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []

        for emotion_dir in sorted(os.listdir(root_dir)):
            emotion_path = os.path.join(root_dir, emotion_dir)
            if not os.path.isdir(emotion_path):
                continue
            for img_name in os.listdir(emotion_path):
                img_path = os.path.join(emotion_path, img_name)
                self.samples.append((img_path, emotion_dir))

        self.le = LabelEncoder()
        self.labels = self.le.fit_transform([label for _, label in self.samples])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("L")  # Grayscale
        if self.transform:
            image = self.transform(image)
        label = self.le.transform([label])[0]
        return image, label

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

train_dir = os.path.expanduser("~/.cache/kagglehub/datasets/msambare/fer2013/versions/1/train")
test_dir = os.path.expanduser("~/.cache/kagglehub/datasets/msambare/fer2013/versions/1/test")

train_dataset = FER2013Dataset(train_dir, transform=transform)
test_dataset = FER2013Dataset(test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(pretrained=True)
model.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Linear(model.fc.in_features, 7)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)


def train_model(num_epochs=30):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        correct, total = 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            del images, labels, outputs, loss
            torch.cuda.empty_cache()

        scheduler.step()
        train_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {total_loss:.4f} - Train Accuracy: {train_acc:.2f}%")

        

def evaluate_model():
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    torch.save(model.state_dict(), "resnet50_fer2013_last.pth")
    print(" Model saved as 'resnet50_fer2013_last.pth'")



if __name__ == "__main__":
    print("Starting training on FER2013 with ResNet50...")
    train_model(num_epochs=30)
    evaluate_model()





Starting training on FER2013 with ResNet50...
Epoch [1/30] - Loss: 537.6481 - Train Accuracy: 54.58%


OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacity of 5.78 GiB of which 28.94 MiB is free. Including non-PyTorch memory, this process has 5.74 GiB memory in use. Of the allocated memory 5.44 GiB is allocated by PyTorch, and 197.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)