## Import DATA PACS and gradient_reversal_example.py

In [1]:
!git clone https://github.com/MachineLearning2020/Homework3-PACS

Cloning into 'Homework3-PACS'...
remote: Enumerating objects: 10032, done.[K
remote: Total 10032 (delta 0), reused 0 (delta 0), pack-reused 10032[K
Receiving objects: 100% (10032/10032), 174.13 MiB | 41.92 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Checking out files: 100% (9993/9993), done.


## ALEX NET


In [2]:
import torch
import torch.nn as nn
import torchvision
from torch.autograd import Function
from torchvision import transforms
from typing import Any

try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url

__all__ = ['AlexNet', 'alexnet']


model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}

class ReverseLayerF(Function):
    # Forwards identity
    # Sends backward reversed gradients
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None


class AlexNet(nn.Module):

    def __init__(self) -> None:
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 1000),
        )
        self.domain_classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 1000),
        )

    def forward(self, x: torch.Tensor, alpha) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        if alpha is not None:
          reverse_feature_x = ReverseLayerF.apply(x, alpha)
          x = self.domain_classifier(reverse_feature_x)

        else:
          x = self.classifier(x)

        return x


def alexnet(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> AlexNet:
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model.load_state_dict(state_dict, strict=False)
        model.domain_classifier.load_state_dict(state_dict, strict=False)
    return model


In [3]:
## Transforms
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])

eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

## Dataloader
train_pacs_source_data = torchvision.datasets.ImageFolder('/content/Homework3-PACS/PACS/photo', train_transform)
train_pacs_source_data_loader = torch.utils.data.DataLoader(train_pacs_source_data, batch_size=256, shuffle=True)

test_pacs_data = torchvision.datasets.ImageFolder('/content/Homework3-PACS/PACS/art_painting', eval_transform)
test_pacs_data_loader = torch.utils.data.DataLoader(test_pacs_data, batch_size=256, shuffle=True)


In [12]:
# class_prediction = net(images)
# domain_prediction = net(images, alpha)
LR = 0.005            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 40      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down
DEVICE = 'cuda' # 'cuda' or 'cpu'
LOG_FREQUENCY = 10


In [13]:
import torch.optim as optim
from torch.backends import cudnn
## Model definition
net = alexnet(pretrained=True)
net.classifier[6] = nn.Linear(4096, 7)
net.domain_classifier[6] = nn.Linear(4096, 2)
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [14]:
## Train
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0

# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  target_data_loader_iterator = iter(test_pacs_data_loader)
  # Iterate over the dataset
  for images, labels in train_pacs_source_data_loader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    labels_da_s = torch.zeros(images.size()[0], dtype=torch.long).to(DEVICE) 

    #target
    try:
      images_t, labels_t = next(target_data_loader_iterator)
      images_t = images_t.to(DEVICE)
      labels_da_t = torch.ones(images_t.size()[0], dtype=torch.long).to(DEVICE) 

    except StopIteration:
      target_data_loader_iterator = iter(test_pacs_data_loader)
      images_t, labels_t = next(target_data_loader_iterator)
      images_t = images_t.to(DEVICE)
      labels_da_t = torch.ones(images_t.size()[0], dtype=torch.long).to(DEVICE) 

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    #i
    outputs = net(images, None)
    loss = criterion(outputs, labels)
    loss.backward()

    #ii
    outputs_da = net(images, 0.05)
    loss_da = criterion(outputs_da, labels_da_s )
    loss_da.backward()
    #iii
    outputs_da_t = net(images_t, 0.05)
    loss_da_t = criterion(outputs_da_t, labels_da_t)
    loss_da_t.backward()

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}, Loss_da {}, Loss_da_t {}'.format(current_step, loss.item(), loss_da.item(), loss_da_t.item()))

    # Compute gradients for each layer and update weights
      # backward pass: computes gradients
   
    
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step()



Starting epoch 1/40, LR = [0.005]
Step 0, Loss 2.4364380836486816, Loss_da 0.8158575296401978, Loss_da_t 0.5731236338615417
Starting epoch 2/40, LR = [0.005]
Step 10, Loss 0.3586142659187317, Loss_da 0.41266006231307983, Loss_da_t 0.2559729516506195
Starting epoch 3/40, LR = [0.005]
Step 20, Loss 0.11831696331501007, Loss_da 0.23419296741485596, Loss_da_t 0.1717045158147812
Starting epoch 4/40, LR = [0.005]
Starting epoch 5/40, LR = [0.005]
Step 30, Loss 0.05489834398031235, Loss_da 0.2712863087654114, Loss_da_t 0.09909719228744507
Starting epoch 6/40, LR = [0.005]
Step 40, Loss 0.048794426023960114, Loss_da 0.11181071400642395, Loss_da_t 0.20286798477172852
Starting epoch 7/40, LR = [0.005]
Starting epoch 8/40, LR = [0.005]
Step 50, Loss 0.029765071347355843, Loss_da 0.10395465791225433, Loss_da_t 0.19983872771263123
Starting epoch 9/40, LR = [0.005]
Step 60, Loss 0.015390575863420963, Loss_da 0.06421293318271637, Loss_da_t 0.17988614737987518
Starting epoch 10/40, LR = [0.005]
Starti

In [11]:
from tqdm import tqdm
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_pacs_data_loader):
  images = images.to(DEVICE)
  #labels =  labels_da_t = torch.ones(images_t.size()[0], dtype=torch.long).to(DEVICE) 
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images, None)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_pacs_data))

print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 8/8 [00:06<00:00,  1.15it/s]

Validation Accuracy: 0.5498046875



