In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!unzip '/content/gdrive/MyDrive/data/img_align_celeba.zip' -d '/content/gdrive/My Drive/data/'

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

In [None]:
transform_input = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

transform_output = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])


In [None]:
def load_image(root_dir: str, dir_name: str, file_name: str, extension: str) -> Image:
    return Image.open(os.path.join(root_dir, dir_name, file_name))


In [None]:
class CelebADataset(Dataset):
    def __init__(self, dir, transform_input, transform_output, selected_attrs):
        super().__init__()
        self.root = dir
        self.selected_attrs = selected_attrs

        self.transform_input = transform_input
        self.transform_output = transform_output

        # read the attributes file and create a dictionary mapping filename to the selected attributes
        with open(os.path.join(dir, 'list_attr_celeba.txt'), 'r') as f:
            lines = f.readlines()
            all_attrs = lines[1].split()
            self.attr_dict = {}
            for line in lines[2:]:
                split_line = line.split()
                filename = split_line[0]
                attrs = (np.array(split_line[1:], dtype=int) == 1) * 2 - 1  # convert to -1 and 1
                selected_attrs_vec = attrs[[all_attrs.index(attr) for attr in selected_attrs]]
                self.attr_dict[filename] = selected_attrs_vec

        self.list_of_inputs = list(self.attr_dict.keys())

    def __getitem__(self, idx):
        to_tensor_transform = transforms.ToTensor()

        filename = self.list_of_inputs[idx]

        input_image = load_image(self.root, 'img_align_celeba', filename, '.jpg')
        input_image = to_tensor_transform(input_image)
        target_attrs = torch.tensor(self.attr_dict[filename]).float()
        
        return input_image, target_attrs

    def __len__(self):
        return len(self.list_of_inputs)

In [None]:
batch_size_train = 16

In [None]:
train_dataset = CelebADataset("/content/gdrive/My Drive/data/", transform_input=transform_input, transform_output=transform_output, 
                              selected_attrs=['Male', 'Big_Nose', 'Bags_Under_Eyes', '5_o_Clock_Shadow','Wearing_Necktie', 'Goatee', 'Sideburns', 'Bushy_Eyebrows'])
train_dataloader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True)

In [None]:
print(len(train_dataloader))

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: ', device)

In [None]:
!unzip '/content/gdrive/MyDrive/data/list_attr_celeba.csv.zip' -d '/content/gdrive/My Drive/data/'

In [None]:
import pandas as pd
df = pd.read_csv('/content/gdrive/My Drive/data/list_attr_celeba.csv')
df

In [None]:
df.set_index('image_id', inplace = True) # Setting 'image_id' column as Index
df.replace(to_replace = -1, value = 0,inplace = True ) # Replacing '-1' values for '0', indicating the absence of attributes in the image
df.head(10) # Displaying dataframe after changes 

In [None]:
df.columns

In [None]:
df.shape

In [None]:
from sklearn.decomposition import PCA

# Load the CelebA dataset and extract the relevant attributes
X = df # Your data matrix of shape (num_samples, num_attributes)

# Initialize the PCA object and fit the data
pca = PCA(n_components=8) # Choose the number of principal components to keep
pca.fit(X)

# Transform the data to the new feature space
X_pca = pca.transform(X)


In [None]:
attribute_names = ['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes',
       'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair', 'Blond_Hair',
       'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin',
       'Eyeglasses', 'Goatee', 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones',
       'Male', 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard',
       'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline',
       'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair',
       'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick',
       'Wearing_Necklace', 'Wearing_Necktie', 'Young']

In [None]:
# Get the loadings of the principal components
loadings = pca.components_

# Print the selected features
num_components = 8 # Change this to the number of principal components you selected
num_attributes = 40 # Change this to the number of attributes in your dataset

for i in range(num_components):
    indices = loadings[i].argsort()[::-1][:8] # Select the top 8 attributes with the highest loadings
    features = [df.columns[j] for j in indices] # Convert the attribute indices to names
    print(f"Principal Component {i+1}: {features}")


In [None]:
Top_8_attributes = ['Male', 'Big_Nose', 'Bags_Under_Eyes', '5_o_Clock_Shadow', 'Wearing_Necktie', 'Goatee', 'Sideburns', 'Bushy_Eyebrows']

In [None]:
import torch.nn.functional as F
from torchvision.models import vgg16

# Define the model architecture with a VGG16 backbone
class AttributePredictionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = vgg16(pretrained=True).features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 8)  # Output size: number of selected attributes
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Define the loss function
criterion = nn.BCEWithLogitsLoss()

# Define the model and move it to the device
model = AttributePredictionModel()
vgg16_model = model.features
for param in vgg16_model.parameters():
    param.requires_grad = False
model.to(device)


# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0002)

# Training loop
epochs = 10
running_tasks_corrects = [0] * 8  # Initialize task-wise corrects to zero

for epoch in range(epochs):
    running_loss = 0.0
    running_corrects = 0
    total_examples = 0

    for inputs, targets in tqdm(train_dataloader):
        # Move data to device
        inputs = inputs.to(device)
        targets = targets.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, targets)

        # Backward pass and optimization step
        loss.backward()
        optimizer.step()

        # Update statistics
        running_loss += loss.item() * inputs.size(0)
        total_examples += inputs.size(0)

        # Compute predictions and update task-wise corrects
        preds = torch.sigmoid(outputs) > 0.5
        running_corrects += torch.sum(preds == targets).item()  
        for i in range(8):
            running_tasks_corrects[i] += torch.sum(preds[:, i] == targets[:, i]).item()

    # Compute epoch statistics
    epoch_loss = running_loss / total_examples
    epoch_acc = running_corrects / total_examples
    epoch_tasks_acc = [running_tasks_corrects[i] / total_examples for i in range(8)]

    # Print epoch statistics
    print(f'Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f} - Acc: {epoch_acc:.4f}')
    for i, attr in enumerate(['Male', 'Big_Nose', 'Bags_Under_Eyes', '5_o_Clock_Shadow', 'Wearing_Necktie', 'Goatee', 'Sideburns', 'Bushy_Eyebrows']):
        print(f'\t{attr}: {epoch_tasks_acc[i]:.4f}')

overall_accuracy = 100 * running_corrects / total_examples
print('Overall Accuracy: {:.2f}%'.format(overall_accuracy))