# Domain adaptation on classification task with AlexNet and PACS dataset

In [1]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Downloading torchmetrics-1.6.1-py3-none-any.whl (927 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m927.3/927.3 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.9 torchmetrics-1.6.1


In [2]:
!git clone https://github.com/SimoneBorella/domain-adaptation.git

!cp -r ./domain-adaptation/models .
!cp -r ./domain-adaptation/utils .

!rm -r ./domain-adaptation
!rm -r sample_data

Cloning into 'domain-adaptation'...
remote: Enumerating objects: 44, done.[K
remote: Counting objects: 100% (44/44), done.[K
remote: Compressing objects: 100% (35/35), done.[K
remote: Total 44 (delta 10), reused 32 (delta 5), pack-reused 0 (from 0)[K
Receiving objects: 100% (44/44), 26.39 KiB | 2.03 MiB/s, done.
Resolving deltas: 100% (10/10), done.


In [3]:
# Download PACS Dataset Images
!git clone https://github.com/MachineLearning2020/Homework3-PACS/
!mkdir -p data
!mv Homework3-PACS/PACS data/
!rm -r Homework3-PACS/

# Download PACS Dataset Labels
!git clone https://github.com/silvia1993/DANN_Template/
!mv DANN_Template/txt_lists/art_painting.txt data/PACS/
!mv DANN_Template/txt_lists/cartoon.txt data/PACS/
!mv DANN_Template/txt_lists/photo.txt data/PACS/
!mv DANN_Template/txt_lists/sketch.txt data/PACS/
!rm -r DANN_Template/

Cloning into 'Homework3-PACS'...
remote: Enumerating objects: 10032, done.[K
remote: Total 10032 (delta 0), reused 0 (delta 0), pack-reused 10032 (from 1)[K
Receiving objects: 100% (10032/10032), 174.13 MiB | 24.63 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Updating files: 100% (9993/9993), done.
Cloning into 'DANN_Template'...
remote: Enumerating objects: 23, done.[K
remote: Total 23 (delta 0), reused 0 (delta 0), pack-reused 23 (from 1)[K
Receiving objects: 100% (23/23), 33.86 KiB | 533.00 KiB/s, done.
Resolving deltas: 100% (5/5), done.


In [34]:
# Google colab
from google.colab import drive
drive.mount('/content/drive')
! mkdir -p /content/drive/MyDrive/da
res_dir = "/content/drive/MyDrive/da"


# Local
# res_dir = "."

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
import os
import re
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from PIL import Image

from torch.utils.data import DataLoader
from torch.backends import cudnn
import torchvision.transforms as T
from torchvision.models import AlexNet_Weights
import torch.nn.functional as F
from torchmetrics import Accuracy

import matplotlib.pyplot as plt
import numpy as np

from models.alexnet import AlexNet
from utils.monitor import Monitor

In [6]:
def get_device():
    if torch.cuda.is_available():
        print("CUDA available")
        print(f"Number of devices: {torch.cuda.device_count()}")
        for dev in range(torch.cuda.device_count()):
            print(f"Device {dev}:")
            print(f"\tName: {torch.cuda.get_device_name(dev)}")
    else:
        print("CUDA not available")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}")

    return device

In [7]:
device = get_device()

CUDA available
Number of devices: 1
Device 0:
	Name: Tesla T4
Device: cuda


## Parameters

In [39]:
SEED = 17

VERSION = 0

NUM_CLASSES = 7
BATCH_SIZE = 256
LR = 0.1            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default
EPOCHS = 80          # Total number of training epochs (iterations over dataset)
STEP_SIZE = 35       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LAMBDA = 0.5

torch.manual_seed(SEED)

<torch._C.Generator at 0x7d0de9de1530>

## Dataset

In [9]:
# Define the Dataset class
class PACSDataset(Dataset):
    def __init__(self, domain, transform, root_dir):
        assert domain in ['photo', 'art_painting', 'cartoon', 'sketch']
        self.examples = [] # (img_path, class_label)
        self.T = transform

        with open(f'{root_dir}/PACS/{domain}.txt', 'r') as f:
            lines = f.readlines()

        for line in lines:
            line = line.strip().split()
            img_path = f"{root_dir}/PACS/{line[0]}"
            class_label = int(line[1])
            self.examples.append((img_path, class_label))

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, index):
        img_path, class_label = self.examples[index]
        img = Image.open(img_path).convert('RGB')
        img = self.T(img)
        return img, class_label

## Dataset preprocessing

In [10]:
def dataset_preprocessing():
    dataset_transform = T.Compose([
        T.Resize(256),
        T.CenterCrop(224),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Define the Dataset object for training & testing
    traindataset = PACSDataset(domain='cartoon', transform=dataset_transform, root_dir='./data')
    testdataset = PACSDataset(domain='sketch', transform=dataset_transform, root_dir='./data')

    # Define the DataLoaders
    trainloader = DataLoader(traindataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    testloader = DataLoader(testdataset, batch_size=BATCH_SIZE, num_workers=4)

    return trainloader, testloader

In [11]:
trainloader, testloader = dataset_preprocessing()



## Model definition

In [12]:
def get_model(device):
    model = AlexNet()
    model.load_state_dict(AlexNet_Weights.IMAGENET1K_V1.get_state_dict(progress=True), strict=False)
    model.classifier[-1] = nn.Linear(4096, NUM_CLASSES)
    model = model.to(device)
    return model

def save_model(model, file_name):
    torch.save(model.state_dict(), file_name)

def load_model(model, file_name, device="cuda"):
    model.load_state_dict(torch.load(file_name, map_location=torch.device(device)))
    return model

## Loss function definition

In [13]:
def get_loss_function():
    return nn.CrossEntropyLoss()

## Optimizer definition

In [14]:
def get_optimizer(model):
    return torch.optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

## Scheduler definition

In [15]:
def get_scheduler(optimizer):
    return optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

## Plot

In [16]:
def plot_training_metrics(
    train_losses, learning_rates, base_dir
):
    fig = plt.figure()
    plt.title("Loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.plot(train_losses, label="Train Loss")
    plt.legend()
    plt.savefig(f"{base_dir}/plots/loss.pdf")
    plt.close(fig)

    fig = plt.figure()
    plt.title("Learning rate")
    plt.ylabel("learning rate")
    plt.xlabel("Epoch")
    plt.plot(learning_rates, label="Learning Rate")
    plt.legend()
    plt.savefig(f"{base_dir}/plots/learning_rate.pdf")
    plt.close(fig)

## Training

In [17]:
def train_baseline(model, trainloader, loss_function, optimizer, scheduler, device, monitor, base_dir):
    cudnn.benchmark = True

    train_losses = []
    learning_rates = []

    for e in range(EPOCHS):
        monitor.start(desc=f"Epoch {e + 1}/{EPOCHS}", max_progress=len(trainloader))

        learning_rate = scheduler.get_last_lr()[0]
        learning_rates.append(learning_rate)

        train_loss = 0.0
        cumulative_loss = 0.0
        count_loss = 0

        model.train()
        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            cls, _ = model(inputs)
            loss = loss_function(cls, labels)

            cumulative_loss += loss.item()
            count_loss += 1

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            train_loss = cumulative_loss / count_loss

            monitor.update(
                i + 1,
                learning_rate=f"{learning_rate:.5f}",
                train_loss=f"{train_loss:.4f}",
            )

        train_losses.append(train_loss)
        monitor.stop()

        scheduler.step()

        save_model(model, f"{base_dir}/weights/last.pt")

        plot_training_metrics(
            train_losses,
            learning_rates,
            base_dir,
        )

    monitor.print_stats()

In [18]:
def train_dann(model, trainloader, loss_function, optimizer, scheduler, device, monitor, base_dir):
    cudnn.benchmark = True

    train_losses = []
    learning_rates = []

    for e in range(EPOCHS):
        monitor.start(desc=f"Epoch {e + 1}/{EPOCHS}", max_progress=len(trainloader))

        learning_rate = scheduler.get_last_lr()[0]
        learning_rates.append(learning_rate)

        train_loss = 0.0
        cumulative_loss = 0.0
        count_loss = 0

        model.train()

        for i, ((src_inputs, src_labels), (trg_inputs, _)) in enumerate(zip(trainloader, testloader)):
            src_inputs, src_labels = src_inputs.to(device), src_labels.to(device)
            trg_inputs = trg_inputs.to(device)

            optimizer.zero_grad()
            src_classes, src_domains = model(src_inputs)
            _, trg_domains = model(trg_inputs)

            if i % 2 == 0:
                # Classification Loss
                loss = loss_function(src_classes, src_labels)

            else:
                # Classification Loss
                class_loss = loss_function(src_classes, src_labels)

                # Source Domain Adversarial Loss --> src_domains_label = 0
                src_domains_label = torch.zeros(src_domains.size(0)).long().to(device)
                src_domains_loss = loss_function(src_domains, src_domains_label)

                # Target Domain Adversarial Loss --> trg_domains_label = 1
                trg_domains_label = torch.ones(trg_domains.size(0)).long().to(device)
                trg_domains_loss = loss_function(trg_domains, trg_domains_label)

                # Final Loss
                loss = class_loss - LAMBDA * (src_domains_loss + trg_domains_loss)


            cumulative_loss += loss.item()
            count_loss += 1

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            train_loss = cumulative_loss / count_loss

            monitor.update(
                i + 1,
                learning_rate=f"{learning_rate:.5f}",
                train_loss=f"{train_loss:.4f}",
            )

        train_losses.append(train_loss)
        monitor.stop()

        scheduler.step()

        save_model(model, f"{base_dir}/weights/last.pt")

        plot_training_metrics(
            train_losses,
            learning_rates,
            base_dir,
        )

    monitor.print_stats()

## Testing

In [19]:
def test(model, testloader, device, monitor):
    monitor.start(desc=f"Testing", max_progress=len(testloader))

    meter = Accuracy(task='multiclass', num_classes=NUM_CLASSES).to(device)

    inference_times = []

    model.eval()
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(testloader):
            inputs, labels = inputs.to(device), labels.to(device)
            start_time = time.perf_counter()
            classes, _ = model(inputs)
            end_time = time.perf_counter()

            batch_inference_time = (end_time - start_time) / inputs.size(0)
            inference_times.append(batch_inference_time)

            meter.update(classes, labels)
            accuracy = meter.compute()
            monitor.update(
                i + 1,
                accuracy=f"{accuracy:.4f}",
            )

    monitor.stop()

    accuracy = meter.compute()
    mean_inference_time = np.mean(inference_times)
    std_inference_time = np.std(inference_times)

    monitor.log(f"Accuracy on test images: {100 * accuracy:.3f} %")
    monitor.log(f"Mean inference time: {mean_inference_time * 1000:.3f} ms")
    monitor.log(f"Standard deviation of inference time: {std_inference_time * 1000:.3f} ms")


## Baseline

In [35]:
model = get_model(device)

os.makedirs(f"{res_dir}/res", exist_ok=True)

dir_name = f"{model.__class__.__name__}_{VERSION}"
for file in os.listdir(f"{res_dir}/res"):
    if file == dir_name:
        raise Exception(f"Directory {dir_name} already exists")

base_dir = f"{res_dir}/res/{dir_name}"
sub_dirs = [base_dir, f"{base_dir}/weights", f"{base_dir}/plots"]
for sub_dir in sub_dirs:
    os.makedirs(sub_dir, exist_ok=True)


# Training
train_monitor = Monitor(file_name=f"{base_dir}/training_log.txt")

loss_function = get_loss_function()
optimizer = get_optimizer(model)
scheduler = get_scheduler(optimizer)

train_monitor.log(f"Model:\n{model}\n")
train_monitor.log(f"Loss function:\n{loss_function}\n")
train_monitor.log(f"Optimizer:\n{optimizer}\n")
train_monitor.log(f"Scheduler:\n{scheduler.__class__.__name__}")
for attr in dir(scheduler):
    if not attr.startswith("_") and not callable(getattr(scheduler, attr)):
        train_monitor.log(f"{attr}: {getattr(scheduler, attr)}")
train_monitor.log("\n")

train_baseline(
    model=model,
    trainloader=trainloader,
    loss_function=loss_function,
    optimizer=optimizer,
    scheduler=scheduler,
    device=device,
    monitor=train_monitor,
    base_dir=base_dir
)


Model:
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(





Epoch 3/60:






Epoch 4/60:






Epoch 5/60:






Epoch 6/60:






Epoch 7/60:






Epoch 8/60:






Epoch 9/60:






Epoch 10/60:






Epoch 11/60:






Epoch 12/60:






Epoch 13/60:






Epoch 14/60:






Epoch 15/60:






Epoch 16/60:






Epoch 17/60:






Epoch 18/60:






Epoch 19/60:






Epoch 20/60:






Epoch 21/60:






Epoch 22/60:






Epoch 23/60:






Epoch 24/60:






Epoch 25/60:






Epoch 26/60:






Epoch 27/60:






Epoch 28/60:






Epoch 29/60:






Epoch 30/60:






Epoch 31/60:






Epoch 32/60:






Epoch 33/60:






Epoch 34/60:






Epoch 35/60:






Epoch 36/60:






Epoch 37/60:






Epoch 38/60:






Epoch 39/60:






Epoch 40/60:






Epoch 41/60:






Epoch 42/60:






Epoch 43/60:






Epoch 44/60:






Epoch 45/60:






Epoch 46/60:






Epoch 47/60:






Epoch 48/60:






Epoch 49/60:






Epoch 50/60:






Epoch 51/60:






Epoch 52/60:






Epoch 53/60:






Epoch 54/60:






Epoch 55/60:






Epoch 56/60:






Epoch 57/60:






Epoch 58/60:






Epoch 59/60:






Epoch 60/60:






Total elapsed time: 00:08:35 s


In [37]:
MODEL_NAME = "last.pt"

model = get_model(device)

dir_name = f"{model.__class__.__name__}_{VERSION}"
base_dir = f"{res_dir}/res/{dir_name}"

# Testing
test_monitor = Monitor(file_name=f"{base_dir}/testing_log.txt")
model = load_model(model, f"{base_dir}/weights/{MODEL_NAME}")

test_monitor.log(f"Testing model: {MODEL_NAME}")

test_monitor.log(f"Testing dataset: traindataset (cartoon)")

test(
    model,
    trainloader,
    device,
    test_monitor
)

test_monitor.log(f"\n")
test_monitor.log(f"Testing dataset: testdataset (sketch)")

test(
    model,
    testloader,
    device,
    test_monitor
)

  model.load_state_dict(torch.load(file_name, map_location=torch.device(device)))


Testing model: last.pt
Testing dataset: traindataset (cartoon)
Testing:


Accuracy on test images: 100.000 %
Mean inference time: 0.016 ms
Standard deviation of inference time: 0.015 ms


Testing dataset: testdataset (sketch)
Testing:


Accuracy on test images: 49.555 %
Mean inference time: 0.020 ms
Standard deviation of inference time: 0.018 ms


## DANN (Domain Adversarial Neural Network)

In [None]:
model = get_model(device)

os.makedirs(f"{res_dir}/res", exist_ok=True)

dir_name = f"{model.__class__.__name__}_DANN_{VERSION}"
for file in os.listdir(f"{res_dir}/res"):
    if file == dir_name:
        raise Exception(f"Directory {dir_name} already exists")

base_dir = f"{res_dir}/res/{dir_name}"
sub_dirs = [base_dir, f"{base_dir}/weights", f"{base_dir}/plots"]
for sub_dir in sub_dirs:
    os.makedirs(sub_dir, exist_ok=True)


# Training
train_monitor = Monitor(file_name=f"{base_dir}/training_log.txt")

loss_function = get_loss_function()
optimizer = get_optimizer(model)
scheduler = get_scheduler(optimizer)

train_monitor.log(f"Model:\n{model}\n")
train_monitor.log(f"Loss function:\n{loss_function}\n")
train_monitor.log(f"Optimizer:\n{optimizer}\n")
train_monitor.log(f"Scheduler:\n{scheduler.__class__.__name__}")
for attr in dir(scheduler):
    if not attr.startswith("_") and not callable(getattr(scheduler, attr)):
        train_monitor.log(f"{attr}: {getattr(scheduler, attr)}")
train_monitor.log("\n")

train_dann(
    model=model,
    trainloader=trainloader,
    loss_function=loss_function,
    optimizer=optimizer,
    scheduler=scheduler,
    device=device,
    monitor=train_monitor,
    base_dir=base_dir
)


Model:
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(





Epoch 3/80:






Epoch 4/80:






Epoch 5/80:






Epoch 6/80:






Epoch 7/80:






Epoch 8/80:






Epoch 9/80:






Epoch 10/80:






Epoch 11/80:






Epoch 12/80:






Epoch 13/80:






Epoch 14/80:






Epoch 15/80:






Epoch 16/80:






Epoch 17/80:






Epoch 18/80:






Epoch 19/80:






Epoch 20/80:






Epoch 21/80:






Epoch 22/80:






Epoch 23/80:






Epoch 24/80:






Epoch 25/80:






Epoch 26/80:






Epoch 27/80:






Epoch 28/80:






Epoch 29/80:






Epoch 30/80:






Epoch 31/80:






Epoch 32/80:






Epoch 33/80:






Epoch 34/80:






Epoch 35/80:






Epoch 36/80:






Epoch 37/80:






Epoch 38/80:






Epoch 39/80:






Epoch 40/80:






Epoch 41/80:






Epoch 42/80:






Epoch 43/80:






Epoch 44/80:






Epoch 45/80:






Epoch 46/80:






Epoch 47/80:






Epoch 48/80:






Epoch 49/80:






Epoch 50/80:






Epoch 51/80:






Epoch 52/80:






Epoch 53/80:






Epoch 54/80:






Epoch 55/80:






In [None]:
MODEL_NAME = "last.pt"

model = get_model(device)

dir_name = f"{model.__class__.__name__}_DANN_{VERSION}"
base_dir = f"{res_dir}/res/{dir_name}"

# Testing
test_monitor = Monitor(file_name=f"{base_dir}/testing_log.txt")
model = load_model(model, f"{base_dir}/weights/{MODEL_NAME}")

test_monitor.log(f"Testing model: {MODEL_NAME}")

test_monitor.log(f"Testing dataset: traindataset (cartoon)")

test(
    model,
    trainloader,
    device,
    test_monitor
)

test_monitor.log(f"\n")
test_monitor.log(f"Testing dataset: testdataset (sketch)")

test(
    model,
    testloader,
    device,
    test_monitor
)