In [1]:
import torch
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import os
import cv2
from PIL import Image

In [2]:
# Device
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('DEVICE: ', DEVICE)

DEVICE:  cpu


In [3]:
# SETTINGS
BATCH_SIZE = 32
LEARNING_RATE = 0.1
FEATURE_IDX = 1
RANDOM_SEED = 123

In [4]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [5]:
# TRANSFORMS
train_transforms = transforms.Compose([
    transforms.CenterCrop((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transforms = transforms.Compose([
    transforms.CenterCrop((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Dataset paths
Train_Dataset_path = '/content/drive/MyDrive/CK+ Dataset/train_data/train'
Train_labels_dataset = '/content/drive/MyDrive/CK+ Dataset/train_data/_classes.csv'
Test_Dataset_path = '/content/drive/MyDrive/CK+ Dataset/test_data/test'
Test_labels_dataset = '/content/drive/MyDrive/CK+ Dataset/test_data/_classes.csv'
Valid_Dataset_path = '/content/drive/MyDrive/CK+ Dataset/valid_data/valid'
Valid_labels_dataset = '/content/drive/MyDrive/CK+ Dataset/valid_data/_classes.csv'

EMOTION_CLASSES = {
    0: "anger",
    1: "contempt",
    2: "disgust",
    3: "fear",
    4: "happiness",
    5: "sadness",
    6: "surprise"
}

# Custom PyTorch dataset
class CKPlusDataset(Dataset):
    def __init__(self, img_dir, label_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform

        # Read CSV file
        self.data = pd.read_csv(label_dir)

        # Ensure column names are correct
        self.data.columns = ["filename", "anger", "contempt", "disgust", "fear", "happy", "sadness", "surprise"]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx, 0]  # Image filename
        label = self.data.iloc[idx, 1:].values.astype(int)  # Multi-class labels (one-hot)
        label = np.argmax(label)  # Convert one-hot to class index
        img_path = os.path.join(self.img_dir, img_name)

        # Check if image exists
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Image not found: {img_path}")

        # Read image
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        image = cv2.resize(image, (128, 128))  # Resize to (128,128)

        if self.transform:
            image = self.transform(Image.fromarray(image))  # Convert to PIL before transforms

        return image, torch.tensor(label, dtype=torch.long)

Train_Dataset = CKPlusDataset(img_dir=Train_Dataset_path, label_dir=Train_labels_dataset, transform=train_transforms)
Test_Dataset = CKPlusDataset(img_dir=Test_Dataset_path, label_dir=Test_labels_dataset, transform=test_transforms)
Valid_Dataset = CKPlusDataset(img_dir=Valid_Dataset_path, label_dir=Valid_labels_dataset, transform=test_transforms)

print(f"Train dataset size: {len(Train_Dataset)}")
print(f"Test dataset size: {len(Test_Dataset)}")
print(f"Valid dataset size: {len(Valid_Dataset)}")

train_loader = DataLoader(Train_Dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(Test_Dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(Valid_Dataset, batch_size=BATCH_SIZE, shuffle=True)

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/CK+ Dataset/train_data/_classes.csv'

In [6]:
for images, labels in train_loader:
    print(f"Batch shape: {images.shape}")  # (batch_size, 3, 128, 128)
    print(f"Labels: {labels}")
    break

EXAMPLE_IMG = images[0]
EXAMPLE_LABEL = labels[0]


fig, ax = plt.subplots(figsize=(2, 2))
ax.imshow(EXAMPLE_IMG.permute(1, 2, 0))
plt.show()

torch.manual_seed(RANDOM_SEED)


NameError: name 'train_loader' is not defined

In [7]:
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x2687f9284b0>

In [8]:
# MODEL

class Reshape(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.size = args

    def forward(self, x):
        return x.view(self.size)

class Trim(nn.Module):
    def forward(self, x):
        return x[:, :, :128, :128]

class VAE(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, stride=2, kernel_size=3, bias=False, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout2d(0.25),
            #
            nn.Conv2d(32, 64, stride=2, kernel_size=3, bias=False, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout2d(0.25),
            #
            nn.Conv2d(64, 64, stride=2, kernel_size=3, bias=False, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout2d(0.25),
            #
            nn.Conv2d(64, 64, stride=2, kernel_size=3, bias=False, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout2d(0.25),
            #
            nn.Flatten(),
        )

        self.z_mean = torch.nn.Linear(4096, 200)
        self.z_log_var = torch.nn.Linear(4096, 200)

        self.decoder = nn.Sequential(
            torch.nn.Linear(200, 4096),
            Reshape(-1, 64, 8, 8),
            #
            nn.ConvTranspose2d(64, 64, stride=2, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout2d(0.25),
            #
            nn.ConvTranspose2d(64, 64, stride=2, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout2d(0.25),
            #
            nn.ConvTranspose2d(64, 32, stride=2, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout2d(0.25),
            #
            nn.ConvTranspose2d(32, 3, stride=2, kernel_size=3, padding=1),
            Trim(),  # 3x129x129 -> 3x128x128
            nn.Sigmoid(),
            Reshape(-1, 3, 128, 128)
        )

    def reparameterize(self, z_mean, z_log_var):
        eps = torch.randn(z_mean.size(0), z_mean.size(1)).to(z_mean.device)
        z = z_mean + torch.exp(z_log_var / 2) * eps
        return z

    def encoding_fn(self, x):
        x = self.encoder(x)
        z_mean = self.z_mean(x)
        z_log_var = self.z_log_var(x)
        encoded = self.reparameterize(z_mean, z_log_var)
        return encoded

    def forward(self, x):
        x = self.encoder(x)
        z_mean, z_log_var = self.z_mean(x), self.z_log_var(x)
        encoded = self.reparameterize(z_mean, z_log_var)
        decoded = self.decoder(encoded)
        return encoded, z_mean, z_log_var, decoded

In [9]:
model = VAE()
model.to(DEVICE)

VAE(
  (encoder): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1, inplace=True)
    (3): Dropout2d(p=0.25, inplace=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): LeakyReLU(negative_slope=0.1, inplace=True)
    (7): Dropout2d(p=0.25, inplace=False)
    (8): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.1, inplace=True)
    (11): Dropout2d(p=0.25, inplace=False)
    (12): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (13): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, trac

In [10]:
def compute_avg_face(feature_idx, image_dim, data_loader, device=None, encoding_fn=None):
    avg_img_with_feat = torch.zeros(image_dim, dtype=torch.float32)
    avg_img_without_feat = torch.zeros(image_dim, dtype=torch.float32)

    num_img_with_feat = 0
    num_images_without_feat = 0

    for images, labels in data_loader:
        idx_img_with_feat = labels == feature_idx

        if encoding_fn is None:
            embeddings = images
        else:
            with torch.no_grad():
                if device is not None:
                    images = images.to(device)
                embeddings = encoding_fn(images).to('cpu')

        avg_img_with_feat += torch.sum(embeddings[idx_img_with_feat], axis=0)
        avg_img_without_feat += torch.sum(embeddings[~idx_img_with_feat], axis=0)
        num_img_with_feat += idx_img_with_feat.sum().item()
        num_images_without_feat += (~idx_img_with_feat).sum().item()

    avg_img_with_feat /= num_img_with_feat
    avg_img_without_feat /= num_images_without_feat

    return avg_img_with_feat, avg_img_without_feat


In [11]:
avg_face_with_feature, avg_face_with_out_feature = compute_avg_face(feature_idx=FEATURE_IDX,
                                                                    image_dim=200,
                                                                    data_loader=train_loader,
                                                                    device=DEVICE,
                                                                    encoding_fn=model.encoding_fn)

NameError: name 'train_loader' is not defined

In [12]:
diff = avg_face_with_feature - avg_face_with_out_feature
example_img = EXAMPLE_IMG.unsqueeze(0).to(DEVICE)

with torch.no_grad():
    embeddings = model.encoding_fn(example_img).squeeze(0).to('cpu')

NameError: name 'avg_face_with_feature' is not defined

In [13]:
def plot_modified_faces(original, diff, diff_coefficients=(0., 0.5, 1., 1.5, 2., 2.5, 3.), decoding_fn=None, device=None, figsize=(8, 2.5)):
    fig, axes = plt.subplots(nrows=2, ncols=len(diff_coefficients), sharex=True, sharey=True, figsize=figsize)

    for i, alpha in enumerate(diff_coefficients):
        more = original + alpha * diff
        less = original - alpha * diff

        if decoding_fn is not None:
            with torch.no_grad():
                if device is not None:
                    more = more.to(device).unsqueeze(0)
                    less = less.to(device).unsqueeze(0)

                more = decoding_fn(more).to('cpu').squeeze(0)
                less = decoding_fn(less).to('cpu').squeeze(0)

        
        more = transforms.CenterCrop(128)(more)
        less = transforms.CenterCrop(128)(less)

        if not alpha:
            s = 'original'
        else:
            s = f'$\\alpha={alpha}$'

        axes[0][i].set_title(s)
        axes[0][i].imshow(more.permute(1, 2, 0))
        axes[1][i].imshow(less.permute(1, 2, 0))
        axes[1][i].axison = False
        axes[0][i].axison = False

plot_modified_faces(original=embeddings, diff=diff, decoding_fn=model.decoder, device=DEVICE)
plt.tight_layout()
plt.show()

NameError: name 'embeddings' is not defined