In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# Set device (GPU if available, else CPU)
device = torch.device("cpu")

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 6 * 6, 7)  # 7 classes for FER2013 dataset

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.maxpool3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x

# Define hyperparameters
num_epochs = 10
batch_size = 64
learning_rate = 0.001

# Define transformations to apply to the input images
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the FER2013 dataset
train_dataset = ImageFolder("data/train/", transform=transform)
test_dataset = ImageFolder("data/test/", transform=transform)
val_dataset = ImageFolder("data/val/", transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Create the CNN model
model = CNN().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

losslist = []
val = []

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}")
            losslist.append(loss.item())
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f"Val Accuracy: {accuracy:.2f}%")
        val.append(accuracy)

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    tp = [0] * 7  # true positives
    fp = [0] * 7  # false positives
    fn = [0] * 7  # false negatives

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        for i in range(len(labels)):
            label = labels[i]
            pred = predicted[i]
            if pred == label:
                tp[label] += 1
            else:
                fp[pred] += 1
                fn[label] += 1

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

    for i in range(7):
        precision = tp[i] / (tp[i] + fp[i])
        recall = tp[i] / (tp[i] + fn[i])
        f1_score = 2 * (precision * recall) / (precision + recall)

        print(f"Class {i} - Precision: {precision:.2f}, Recall: {recall:.2f}, F1-score: {f1_score:.2f}")
print(losslist)
print(val)

Epoch [1/10], Step [100/449], Loss: 1.8058
Epoch [1/10], Step [200/449], Loss: 1.7253
Epoch [1/10], Step [300/449], Loss: 1.4672
Epoch [1/10], Step [400/449], Loss: 1.4077
Val Accuracy: 46.78%
Epoch [2/10], Step [100/449], Loss: 1.2675
Epoch [2/10], Step [200/449], Loss: 1.2678
Epoch [2/10], Step [300/449], Loss: 1.1787
Epoch [2/10], Step [400/449], Loss: 1.3085
Val Accuracy: 50.63%
Epoch [3/10], Step [100/449], Loss: 1.2573
Epoch [3/10], Step [200/449], Loss: 1.1325
Epoch [3/10], Step [300/449], Loss: 1.3048
Epoch [3/10], Step [400/449], Loss: 1.2964
Val Accuracy: 53.83%
Epoch [4/10], Step [100/449], Loss: 1.0010
Epoch [4/10], Step [200/449], Loss: 1.1813
Epoch [4/10], Step [300/449], Loss: 1.0324
Epoch [4/10], Step [400/449], Loss: 0.9988
Val Accuracy: 54.78%
Epoch [5/10], Step [100/449], Loss: 1.1461
Epoch [5/10], Step [200/449], Loss: 1.2781
Epoch [5/10], Step [300/449], Loss: 1.0488
Epoch [5/10], Step [400/449], Loss: 0.9991
Val Accuracy: 56.42%
Epoch [6/10], Step [100/449], Loss:

In [2]:
class SimpleConvNet(nn.Module):
    def __init__(self):
        super(SimpleConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(32 * 12 * 12, 7)  # 7 classes for FER2013 dataset

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [3]:
class ConvAutoencoderNet(nn.Module):
    def __init__(self):
        super(ConvAutoencoderNet, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )
        self.fc = nn.Linear(2304, 7)  # 7 classes for FER2013 dataset

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [15]:
import cv2

# ...

def face_detection_transform(image):
    # Apply face detection using a pre-trained face detector
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    
    if len(faces) > 0:
        # Crop the image to the detected face region
        x, y, w, h = faces[0]
        cropped_face = image[y:y+h, x:x+w]
        
        # Resize the cropped face to 48x48 pixels
        resized_face = cv2.resize(cropped_face, (48, 48))
        
        # Convert the resized face to grayscale
        gray_face = cv2.cvtColor(resized_face, cv2.COLOR_BGR2GRAY)
        
        return gray_face
    
    return None

transform = transforms.Compose([
    face_detection_transform,
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [24]:
image = cv2.imread("data/webcam/sup.jpg")
processed_image = transform(image).unsqueeze(0).to(device)

# Apply the trained model to estimate the label
model.eval()
with torch.no_grad():
    output = model(processed_image)
    _, predicted = torch.max(output, 1)

estimated_label = predicted.item()
print(estimated_label)

5
