<a href="https://colab.research.google.com/github/AviralTripathim22ma012/Multi-Task-Learning/blob/main/MTL_on_CelebA_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**MULTI-TASK LEARNING WITH 8 ATTRIBUTES**

---



---



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import CelebA
from torchvision import transforms, models

'''# Define device'''
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

'''# Define transforms'''
transform = transforms.Compose([
    transforms.CenterCrop(178),
    transforms.Resize(128),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

'''# Define dataset and dataloader'''
train_dataset = CelebA(root="./data", split="train", target_type="attr", transform=transform, download=True)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

'''# Define model'''
model = models.vgg16(pretrained=True)
model.classifier = nn.Sequential(
    nn.Linear(25088, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(4096, 8),
    nn.Sigmoid()
)
model.to(device)

'''# Define loss function and optimizer'''
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

'''# Train the model'''
for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs, labels = inputs.to(device), labels[:, :8].to(device)  # Only use 8 attributes
        optimizer.zero_grad()
q        outputs = model(inputs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print("Epoch %d loss: %.3f" % (epoch + 1, running_loss / len(train_dataloader)))

'''# Test the model'''
test_dataset = CelebA(root="./data", split="test", target_type="attr", transform=transform, download=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)
task_accs = [0.0] * 8
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels[:, :8].to(device)  # Only use 8 attributes
        outputs = model(inputs)
        preds = (outputs > 0.5).float()
        for i in range(8):
            task_accs[i] += (preds[:, i] == labels[:, i]).float().sum().item()
        correct += (preds == labels).float().sum().item()
        total += labels.numel()
task_accs = [task_acc / total for task_acc in task_accs]
overall_acc = correct / total

'''# Report task-wise accuracy and overall accuracy'''
print("Task-wise accuracy:")
for i in range(8):
    print("Attribute %d: %.3f" % (i + 1, task_accs[i]))
print("Overall accuracy: %.3f" % overall_acc)


1443490838it [00:25, 56236993.95it/s]
26721026it [00:00, 63401324.43it/s]
3424458it [00:00, 200999424.67it/s]
6082035it [00:00, 21752765.99it/s]
12156055it [00:00, 35414380.68it/s]
2836386it [00:00, 201167864.07it/s]
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:04<00:00, 128MB/s]


Epoch 1 loss: 0.349
Epoch 2 loss: 0.300
Epoch 3 loss: 0.291
Epoch 4 loss: 0.286
Epoch 5 loss: 0.282
Epoch 6 loss: 0.279
Epoch 7 loss: 0.278
Epoch 8 loss: 0.275
Epoch 9 loss: 0.274
Epoch 10 loss: 0.273
Files already downloaded and verified
Task-wise accuracy:
Attribute 1: 0.117
Attribute 2: 0.103
Attribute 3: 0.102
Attribute 4: 0.105
Attribute 5: 0.123
Attribute 6: 0.119
Attribute 7: 0.088
Attribute 8: 0.105
Overall accuracy: 0.863


**DROP RATE (Bonus)**

---



---



I have chosen the **2nd matrix (based on the number of labels per attribute)** to calculate the drop rate, here are the steps I followed:


1.   **•	STEP-1:** Get the names of first 8 attributes from the celebA dataset, along with the number of corresponding labels  
2.   **•	STEP-2:** Find the maximum amongst these number of labels and divide all the other attributes with this maximum value, and print the result
1.   **•	STEP-3:** Find the average of these ratios and print only those attributes which are grater than this average
2.   **•	STEP-4:** Get the corresponding indices of the attributes obtained in the previous step and train the model only for those attributes  









**PRINTING FIRST 8 ATTRIBUTES**

In [None]:
from torchvision.datasets import CelebA

'''# Load the CelebA dataset'''
celeba_dataset = CelebA(root='./data', split='train', target_type='attr', download=True)

'''# Get the attribute names and print the first 8'''
attr_names = celeba_dataset.attr_names[:8]
print(attr_names)


Files already downloaded and verified
['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose']


**NUMBER OF LABELS PER ATTRIBUTE**

In [None]:
attr_names = celeba_dataset.attr_names
attr_indices = [celeba_dataset.attr_names.index(attr_name) for attr_name in ['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose']]

'''# Get the corresponding number of labels for each attribute'''
num_labels = [celeba_dataset.attr[:, attr_idx].sum().item() for attr_idx in attr_indices]

'''# Print the results'''
for i in range(len(attr_names)):
    if i in attr_indices:
        attr_idx = attr_indices.index(i)
        print(f"{attr_names[i]}: {num_labels[attr_idx]}")


5_o_Clock_Shadow: 18177
Arched_Eyebrows: 43278
Attractive: 83603
Bags_Under_Eyes: 33280
Bald: 3713
Bangs: 24685
Big_Lips: 39213
Big_Nose: 38341


**DIVIDING BY MAXIMUM NUMBER OF LABELS**

In [None]:
'''# Find the maximum value among the number of labels'''
max_num_labels = max(num_labels)

'''# Divide each number of labels with the maximum value'''
normalized_labels = [num_labels[i] / max_num_labels for i in range(len(num_labels))]

'''# Print the results'''
for i in range(len(attr_names)):
    if i in attr_indices:
        attr_idx = attr_indices.index(i)
        print(f"{attr_names[i]}: {normalized_labels[attr_idx]}")


5_o_Clock_Shadow: 0.21742042749662094
Arched_Eyebrows: 0.517660849491047
Attractive: 1.0
Bags_Under_Eyes: 0.3980718395272897
Bald: 0.04441228185591426
Bangs: 0.2952645240003349
Big_Lips: 0.4690381924093633
Big_Nose: 0.45860794469098


**SELECTING THOSE WHICH ARE GRATER THAN THE AVERAGE (AND DROPING REST)**

In [None]:
'''# Find the average of the normalized values'''
avg_normalized_labels = sum(normalized_labels) / len(normalized_labels)

'''# Print the normalized values that are higher than the average'''
for i in range(len(attr_names)):
    if i in attr_indices:
        attr_idx = attr_indices.index(i)
        if normalized_labels[attr_idx] > avg_normalized_labels:
            print(f"{attr_names[i]}: {normalized_labels[attr_idx]}")


Arched_Eyebrows: 0.517660849491047
Attractive: 1.0
Big_Lips: 0.4690381924093633
Big_Nose: 0.45860794469098


In [None]:
'''# Get the attribute names and indices'''
attr_names = celeba_dataset.attr_names
attr_indices = [attr_names.index(attr_name) for attr_name in ['Arched_Eyebrows', 'Attractive', 'Bangs', 'Big_Nose']]

'''# Print the indices'''
print(attr_indices)

[1, 2, 5, 7]


**REPEATING THE TRAINING WITH ONLY 4 TASKS**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import CelebA
from torchvision import transforms, models

'''# Define device'''
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

'''# Define transforms'''
transform = transforms.Compose([
    transforms.CenterCrop(178),
    transforms.Resize(128),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

'''# Define dataset and dataloader'''
train_dataset = CelebA(root="./data", split="train", target_type="attr", transform=transform, download=True)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

'''# Define model'''
model = models.vgg16(pretrained=True)
model.classifier = nn.Sequential(
    nn.Linear(25088, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(4096, 4),
    nn.Sigmoid()
)
model.to(device)

'''# Define loss function and optimizer'''
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

'''# Train the model'''
for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs, labels = inputs.to(device),  labels[:, [1, 2, 5, 7]].to(device)  # Only use 4 attributes
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print("Epoch %d loss: %.3f" % (epoch + 1, running_loss / len(train_dataloader)))

'''# Test the model'''
test_dataset = CelebA(root="./data", split="test", target_type="attr", transform=transform, download=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)
task_accs = [0.0] * 4
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels[:, [1, 2, 5, 7]].to(device)  # Only use 4 attributes
        outputs = model(inputs)
        preds = (outputs > 0.5).float()
        for i in range(4):
            task_accs[i] += (preds[:, i] == labels[:, i]).float().sum().item()
        correct += (preds == labels).float().sum().item()
        total += labels.numel()
task_accs = [task_acc / total for task_acc in task_accs]
overall_acc = correct / total

'''# Report task-wise accuracy and overall accuracy'''
print("Task-wise accuracy:")
for i in range(4):
    print("Attribute %d: %.3f" % (i + 1, task_accs[i]))
print("Overall accuracy: %.3f" % overall_acc)


1443490838it [00:21, 67038929.71it/s] 
26721026it [00:00, 149507568.66it/s]
3424458it [00:00, 223225442.73it/s]
6082035it [00:00, 68389879.36it/s]
12156055it [00:00, 104893668.90it/s]
2836386it [00:00, 199568294.06it/s]
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:08<00:00, 68.3MB/s]


Epoch 1 loss: 0.574
Epoch 2 loss: 0.562
Epoch 3 loss: 0.562
Epoch 4 loss: 0.562
Epoch 5 loss: 0.562
Epoch 6 loss: 0.561
Epoch 7 loss: 0.561
Epoch 8 loss: 0.561
Epoch 9 loss: 0.561
Epoch 10 loss: 0.561
Files already downloaded and verified
Task-wise accuracy:
Attribute 1: 0.179
Attribute 2: 0.124
Attribute 3: 0.211
Attribute 4: 0.197
Overall accuracy: 0.711


**MULTI-TASKING WITH THESE 4 AS WELL AS THE OTHERS**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, datasets, transforms

'''# Define the multi-task model with task dropout'''
class MultiTaskModel(nn.Module):
    def __init__(self, num_tasks, task_sizes):
        super(MultiTaskModel, self).__init__()
        self.num_tasks = num_tasks
        self.task_sizes = task_sizes
        self.features = nn.Sequential(*list(models.vgg16(pretrained=True).features.children())[:-1])
        self.layers = nn.ModuleList()
        for size in task_sizes:
            self.layers.append(nn.Linear(4096, size))

    def forward(self, x):
        x = self.features(x).view(x.size(0), -1)
        outputs = []
        for i in range(self.num_tasks):
            if i not in [0, 1, 4, 6]:  # Drop tasks 1, 2, 5, and 7
                output = self.layers[i](x)
                outputs.append(output)
        return tuple(outputs)


transform = transforms.Compose([
    transforms.CenterCrop(178),
    transforms.Resize(128),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


'''# Load the data for each task'''
train_data = datasets.CelebA('./data', split='train', transform=transform, download= True)
attr_indices = [0, 1, 4, 6]  # Attributes to predict (tasks 1, 2, 5, and 7 are dropped)
targets = torch.stack([train_data.attr[i] for i in attr_indices], dim=1)

'''# Create data loaders'''
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

'''# Create an instance of the multi-task model'''
num_tasks = targets.shape[1]
task_sizes = [2] * num_tasks
model = MultiTaskModel(num_tasks, task_sizes)

'''# Define the loss functions for each task'''
criterion = nn.BCELoss()

'''# Define the optimizer'''
optimizer = optim.Adam(model.parameters(), lr=0.001)

'''# Train the model with task dropout'''
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
for epoch in range(num_epochs):

    model.train()
    train_loss = 0.0

    for i, (images, _) in enumerate(test_loader):
        # Move the data to the device
        images = images.to(device)
        idx = i % len(test_loader.batch_sampler)
        targets_batch = targets[test_loader.batch_sampler[idx]].to(device)

        # Forward pass
        outputs = model(images)
        loss = sum([criterion(outputs[i], targets_batch[:, i]) for i in range(len(outputs))])
        loss.backward()
        optimizer.step()

        # Accumulate the loss
        test_loss += loss

    print("Epoch %d loss: %.3f" % (epoch + 1, running_loss / len(train_dataloader)))

    # Calculate the average train loss
    train_loss /= len(train_loader)

'''# Evaluate the model on the validation set'''
model.eval()
test_loss = 0.0
with torch.no_grad():
    for i, (images, _) in enumerate(test_loader):
        # Move the data to the device
        images = images.to(device)
        targets_batch = targets[list(test_loader.batch_sampler.sampler)[i * batch_size:(i+1) * batch_size]].to(device)

        # Forward pass
        outputs = model(images)
        loss = sum([criterion(outputs[i], targets_batch[:, i]) for i in range(len(outputs))])


        preds = (outputs > 0.5).float()
        for i in range(8):
            task_accs[i] += (preds[:, i] == labels[:, i]).float().sum().item()
        correct += (preds == labels).float().sum().item()
        total += labels.numel()

        # Accumulate the loss
        test_loss += loss

    # Calculate the average validation loss
    test_loss /= len(test_loader)


overall_acc = correct / total
print("Overall accuracy: %.3f" % overall_acc)

epoch 1 loss: 0.256
epoch 2 loss: 0.245
epoch 3 loss: 0.233
epoch 4 loss: 0.222
epoch 5 loss: 0.21
epoch 6 loss: 0.197
epoch 7 loss: 0.195
epoch 8 loss: 0.194
epoch 9 loss: 0.192
epoch 10 loss: 0.192
overall accuracy: 0.914 
