# Domain adaptation on classification task with AlexNet and PACS dataset

In [None]:
!pip install torchmetrics

In [None]:
!git clone https://github.com/SimoneBorella/domain-adaptation.git

!cp -r ./domain-adaptation/models .
!cp -r ./domain-adaptation/utils .

!rm -r ./domain-adaptation
!rm -r sample_data

In [None]:
# Download PACS Dataset Images
!git clone https://github.com/MachineLearning2020/Homework3-PACS/
!mkdir -p data
!mv Homework3-PACS/PACS data/
!rm -r Homework3-PACS/

# Download PACS Dataset Labels
!git clone https://github.com/silvia1993/DANN_Template/
!mv DANN_Template/txt_lists/art_painting.txt data/PACS/
!mv DANN_Template/txt_lists/cartoon.txt data/PACS/
!mv DANN_Template/txt_lists/photo.txt data/PACS/
!mv DANN_Template/txt_lists/sketch.txt data/PACS/
!rm -r DANN_Template/

In [2]:
# Google colab
# from google.colab import drive
# drive.mount('/content/drive')
# ! mkdir -p /content/drive/MyDrive/da
# res_dir = "/content/drive/MyDrive/da"


# Local
res_dir = "."

In [3]:
import os
import re
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from PIL import Image

from torch.utils.data import DataLoader
from torch.backends import cudnn
import torchvision.transforms as T
from torch.hub import load_state_dict_from_url
import torch.nn.functional as F
from torchmetrics import Accuracy

import matplotlib.pyplot as plt
import numpy as np

from models.alexnet_dann_rev import AlexNetDANNRev
from utils.monitor import Monitor

In [4]:
def get_device():
    if torch.cuda.is_available():
        print("CUDA available")
        print(f"Number of devices: {torch.cuda.device_count()}")
        for dev in range(torch.cuda.device_count()):
            print(f"Device {dev}:")
            print(f"\tName: {torch.cuda.get_device_name(dev)}")
    else:
        print("CUDA not available")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}")

    return device

In [5]:
device = get_device()

CUDA not available
Device: cpu


## Parameters

In [21]:
SEED = 17

VERSION = 1

NUM_CLASSES = 7
BATCH_SIZE = 256
LR = 0.005           # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default
EPOCHS = 30          # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

ALPHA = 0.1

torch.manual_seed(SEED)

<torch._C.Generator at 0x7d8d6410f470>

## Dataset

In [7]:
# Define the Dataset class
class PACSDataset(Dataset):
    def __init__(self, domain, transform, root_dir):
        assert domain in ['photo', 'art_painting', 'cartoon', 'sketch']
        self.examples = [] # (img_path, class_label)
        self.T = transform

        with open(f'{root_dir}/PACS/{domain}.txt', 'r') as f:
            lines = f.readlines()

        for line in lines:
            line = line.strip().split()
            img_path = f"{root_dir}/PACS/{line[0]}"
            class_label = int(line[1])
            self.examples.append((img_path, class_label))

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, index):
        img_path, class_label = self.examples[index]
        img = Image.open(img_path).convert('RGB')
        img = self.T(img)
        return img, class_label

## Dataset preprocessing

In [8]:
def dataset_preprocessing():
    dataset_transform = T.Compose([
        T.Resize(256),
        T.CenterCrop(224),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Define the Dataset object for training & testing
    traindataset = PACSDataset(domain='cartoon', transform=dataset_transform, root_dir='./data')
    testdataset = PACSDataset(domain='sketch', transform=dataset_transform, root_dir='./data')

    # Define the DataLoaders
    trainloader = DataLoader(traindataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    testloader = DataLoader(testdataset, batch_size=BATCH_SIZE, num_workers=4)

    return trainloader, testloader

In [9]:
trainloader, testloader = dataset_preprocessing()

## Model definition

In [10]:
def get_model(device):
    model_urls = {
        'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
    }

    model = AlexNetDANNRev()
    model.load_state_dict(load_state_dict_from_url(model_urls['alexnet'], progress=True), strict=False)
    model.classifier[-1] = nn.Linear(4096, NUM_CLASSES)
    model = model.to(device)
    
    return model

def save_model(model, file_name):
    torch.save(model.state_dict(), file_name)

def load_model(model, file_name, device="cuda"):
    model.load_state_dict(torch.load(file_name, map_location=torch.device(device)))
    return model

## Loss function definition

In [11]:
def get_loss_function():
    return nn.CrossEntropyLoss()

## Optimizer definition

In [12]:
def get_optimizer(model):
    return torch.optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

## Scheduler definition

In [13]:
def get_scheduler(optimizer):
    return optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

## Plot

In [14]:
def plot_training_metrics(train_class_losses, train_src_domain_losses, train_trg_domain_losses, learning_rates, base_dir):
    fig = plt.figure()
    plt.title("Loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.plot(train_class_losses, label="Train Class Loss")
    plt.plot(train_src_domain_losses, label="Train Src Domain Loss")
    plt.plot(train_trg_domain_losses, label="Train Trg Domain Loss")
    plt.legend()
    plt.savefig(f"{base_dir}/plots/loss.pdf")
    plt.close(fig)

    fig = plt.figure()
    plt.title("Learning rate")
    plt.ylabel("learning rate")
    plt.xlabel("Epoch")
    plt.plot(learning_rates, label="Learning Rate")
    plt.legend()
    plt.savefig(f"{base_dir}/plots/learning_rate.pdf")
    plt.close(fig)

## Training

In [15]:
def train_dann_rev(model, trainloader, loss_function, optimizer, scheduler, device, monitor, base_dir):
    cudnn.benchmark = True

    train_class_losses = []
    train_src_domain_losses = []
    train_trg_domain_losses = []
    learning_rates = []

    for e in range(EPOCHS):
        monitor.start(desc=f"Epoch {e + 1}/{EPOCHS}", max_progress=len(trainloader))

        learning_rate = scheduler.get_last_lr()[0]
        learning_rates.append(learning_rate)

        train_class_loss = 0.0
        train_src_domain_loss = 0.0
        train_trg_domain_loss = 0.0

        cumulative_class_loss = 0.0
        cumulative_src_domain_loss = 0.0
        cumulative_trg_domain_loss = 0.0

        count_loss = 0

        model.train()

        for i, ((src_inputs, src_labels), (trg_inputs, _)) in enumerate(zip(trainloader, testloader)):
            src_inputs, src_labels = src_inputs.to(device), src_labels.to(device)
            trg_inputs = trg_inputs.to(device)

            optimizer.zero_grad()

            src_classes = model(src_inputs)
            src_domains = model(src_inputs, alpha=ALPHA)
            trg_domains = model(trg_inputs, alpha=ALPHA)
            
            
            # Classification Loss
            class_loss = loss_function(src_classes, src_labels)
            cumulative_class_loss += class_loss.item()
            class_loss.backward()

            # Source Domain Adversarial Loss --> src_domains_label = 0
            src_domains_label = torch.zeros(BATCH_SIZE, dtype=torch.int64).to(device)
            src_domains_loss = loss_function(src_domains, src_domains_label)
            cumulative_src_domain_loss += src_domains_loss.item()
            src_domains_loss.backward()

            # Target Domain Adversarial Loss --> trg_domains_label = 1
            trg_domains_label = torch.ones(BATCH_SIZE, dtype=torch.int64).to(device)
            trg_domains_loss = loss_function(trg_domains, trg_domains_label)
            cumulative_trg_domain_loss += trg_domains_loss.item()
            trg_domains_loss.backward()
            
            count_loss += 1

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            train_class_loss = cumulative_class_loss / count_loss
            train_src_domain_loss = cumulative_src_domain_loss / count_loss
            train_trg_domain_loss = cumulative_trg_domain_loss / count_loss
            
            monitor.update(
                i + 1,
                learning_rate=f"{learning_rate:.5f}",
                train_class_loss=f"{train_class_loss:.4f}",
                train_src_domain_loss=f"{train_src_domain_loss:.4f}",
                train_trg_domain_loss=f"{train_trg_domain_loss:.4f}",
            )

        train_class_losses.append(train_class_loss)
        train_src_domain_losses.append(train_src_domain_loss)
        train_trg_domain_losses.append(train_trg_domain_loss)

        monitor.stop()

        scheduler.step()

        save_model(model, f"{base_dir}/weights/last.pt")

        plot_training_metrics(
            train_class_losses,
            train_src_domain_losses,
            train_trg_domain_losses,
            learning_rates,
            base_dir,
        )

    monitor.print_stats()

## Testing

In [19]:
def test(model, testloader, device, monitor):
    monitor.start(desc=f"Testing", max_progress=len(testloader))

    test_accuracy = 0.0
    correct_predictions = 0
    count_predictions = 0

    inference_times = []

    model.eval()
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(testloader):
            inputs, labels = inputs.to(device), labels.to(device)
            start_time = time.perf_counter()
            logits = model(inputs)
            end_time = time.perf_counter()

            batch_inference_time = (end_time - start_time) / inputs.size(0)
            inference_times.append(batch_inference_time)

            predicted_labels = torch.argmax(logits, dim=1)
            count_predictions += labels.size(0)
            correct_predictions += (predicted_labels == labels).sum().item()
            test_accuracy = correct_predictions / count_predictions

            monitor.update(
                i + 1,
                test_accuracy=f"{test_accuracy:.4f}",
            )

    monitor.stop()

    mean_inference_time = np.mean(inference_times)
    std_inference_time = np.std(inference_times)

    monitor.log(f"Accuracy on test images: {100 * test_accuracy:.3f} %")
    monitor.log(f"Mean inference time: {mean_inference_time * 1000:.3f} ms")
    monitor.log(f"Standard deviation of inference time: {std_inference_time * 1000:.3f} ms")


## DANN (Domain Adversarial Neural Network)

In [17]:
model = get_model(device)

os.makedirs(f"{res_dir}/res", exist_ok=True)

dir_name = f"{model.__class__.__name__}_{VERSION}"
for file in os.listdir(f"{res_dir}/res"):
    if file == dir_name:
        raise Exception(f"Directory {dir_name} already exists")

base_dir = f"{res_dir}/res/{dir_name}"
sub_dirs = [base_dir, f"{base_dir}/weights", f"{base_dir}/plots"]
for sub_dir in sub_dirs:
    os.makedirs(sub_dir, exist_ok=True)


# Training
train_monitor = Monitor(file_name=f"{base_dir}/training_log.txt")

loss_function = get_loss_function()
optimizer = get_optimizer(model)
scheduler = get_scheduler(optimizer)

train_monitor.log(f"Model:\n{model}\n")
train_monitor.log(f"Loss function:\n{loss_function}\n")
train_monitor.log(f"Optimizer:\n{optimizer}\n")
train_monitor.log(f"Scheduler:\n{scheduler.__class__.__name__}")
for attr in dir(scheduler):
    if not attr.startswith("_") and not callable(getattr(scheduler, attr)):
        train_monitor.log(f"{attr}: {getattr(scheduler, attr)}")
train_monitor.log("\n")

train_dann_rev(
    model=model,
    trainloader=trainloader,
    loss_function=loss_function,
    optimizer=optimizer,
    scheduler=scheduler,
    device=device,
    monitor=train_monitor,
    base_dir=base_dir
)


Model:
AlexNetDANNRev(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): D

In [20]:
MODEL_NAME = "last.pt"

model = get_model(device)

dir_name = f"{model.__class__.__name__}_{VERSION}"
base_dir = f"{res_dir}/res/{dir_name}"

# Testing
test_monitor = Monitor(file_name=f"{base_dir}/testing_log.txt")
model = load_model(model, f"{base_dir}/weights/{MODEL_NAME}", device)

test_monitor.log(f"Testing model: {MODEL_NAME}")

test_monitor.log(f"Testing dataset: traindataset (cartoon)")

test(
    model,
    trainloader,
    device,
    test_monitor
)

test_monitor.log(f"\n")
test_monitor.log(f"Testing dataset: testdataset (sketch)")

test(
    model,
    testloader,
    device,
    test_monitor
)

  model.load_state_dict(torch.load(file_name, map_location=torch.device(device)))


Testing model: last.pt
Testing dataset: traindataset (cartoon)
Testing:
Progress: |██████████|  100% 	Time: 00:00:13 s 	Test accuracy: 0.9987 

Accuracy on test images: 99.870 %
Mean inference time: 5.245 ms
Standard deviation of inference time: 0.934 ms


Testing dataset: testdataset (sketch)
Testing:
Progress: |██████████|  100% 	Time: 00:00:24 s 	Test accuracy: 0.5961 

Accuracy on test images: 59.608 %
Mean inference time: 5.908 ms
Standard deviation of inference time: 0.845 ms
