<a href="https://colab.research.google.com/github/Zinni98/DL-Project/blob/main/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from os import makedirs
from os import listdir
from os.path import join, isfile
from tqdm import tqdm
from google.colab import drive
from shutil import copytree

In [2]:
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [3]:
import torch
from torch.utils.data import Subset
import torchvision.transforms as T
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torchvision
import torchvision.transforms.functional as F1


def get_data(batch_size, root_dir):
  """

  Params:
  ------
  root_dir: str
    Directory of adaptiope_small (e.g. "something/something_else/adaptiope_small")
  """

  # Transforms for resnet found there https://pytorch.org/hub/pytorch_vision_resnet/
  transform_img = list()
  transform_img.append(T.Resize(256))
  transform_img.append(T.CenterCrop(224))
  transform_img.append(T.ToTensor())
  transform_img.append(T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
  transform_img = T.Compose(transform_img)

  # load data
  product_images_dataset = ImageFolder(root = f"{root_dir}/product_images/", transform = transform_img)
  rw_images_dataset = ImageFolder(root = f"{root_dir}/real_life/", transform = transform_img)

  product_train_indexes, product_test_indexes = train_test_split(list(range(len(product_images_dataset.targets))),
                                                test_size = 0.2, stratify = product_images_dataset.targets, random_state = 42)
  
  rw_train_indexes, rw_test_indexes = train_test_split(list(range(len(rw_images_dataset.targets))),
                                                test_size = 0.2, stratify = rw_images_dataset.targets, random_state = 42)
  

  product_train_data = Subset(product_images_dataset, product_train_indexes)
  product_test_data = Subset(product_images_dataset, product_test_indexes)

  rw_train_data = Subset(rw_images_dataset, rw_train_indexes)
  rw_test_data = Subset(rw_images_dataset, rw_test_indexes)

  product_train_loader = torch.utils.data.DataLoader(product_train_data, batch_size, shuffle = False)
  product_test_loader = torch.utils.data.DataLoader(product_test_data, batch_size, shuffle = False)

  rw_train_loader = torch.utils.data.DataLoader(rw_train_data, batch_size, shuffle = False)
  rw_test_loader = torch.utils.data.DataLoader(rw_test_data, batch_size, shuffle = False)

  return product_train_loader, product_test_loader, rw_train_loader, rw_test_loader




In [4]:
import torch.nn as nn


def initialize_resnet34(num_classes, pretrained = True):

  model = torchvision.models.resnet34(pretrained=pretrained)

  in_features = model.fc.in_features

  ##model.fc = nn.Sequential(nn.Linear(512, num_classes))#, nn.LogSoftmax(dim = 1))
  model.fc = nn.Linear(512, num_classes)
  for param in model.fc.parameters():
    param.requires_grad = True

  return model

In [5]:
# for labelled data
def get_ce_cost_function():
  cost_function = torch.nn.CrossEntropyLoss()
  return cost_function

In [6]:
import torch.optim as optim

def get_optimizer_SGD(model, lr, wd, momentum):
  
  final_layer_weights = []
  rest_of_the_net_weights = []
  
  for name, param in model.named_parameters():
    if name.startswith('fc'): # ensure to work with parameters related to linear layer (7th)
      final_layer_weights.append(param)
    else:
      rest_of_the_net_weights.append(param)

  optimizer = torch.optim.SGD([
      {'params': rest_of_the_net_weights},
      {'params': final_layer_weights, 'lr': lr}
  ], lr=lr / 10, weight_decay=wd, momentum=momentum)
  
  return optimizer

def get_optimizer_ADAM(net, lr = 0.0001, weight_decay = 0.000001):
  opt = optim.Adam(params=net.parameters(), lr=lr, amsgrad=True, weight_decay = weight_decay, betas=(0.8, 0.9))
  return opt

In [7]:
def training_step_baseline(net, data_loader, optimizer, cost_function, device = 'cuda'):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.
  
  net.train() 
 
  # iterate over the training set
  for batch_idx, (inputs, targets) in enumerate(data_loader):
    # load data into GPU
    inputs = inputs.to(device)
    targets = targets.to(device)
      
    # forward pass
    outputs = net(inputs)

    # loss computation
    loss = cost_function(outputs,targets)

    # backward pass
    loss.backward()
    
    # parameters update
    optimizer.step()

    # gradients reset
    optimizer.zero_grad()

    # fetch prediction and loss value
    samples += inputs.shape[0]
    cumulative_loss += loss.item()
    _, predicted = outputs.max(dim=1) # max() returns (maximum_value, index_of_maximum_value)

    # compute training accuracy
    cumulative_accuracy += predicted.eq(targets).sum().item()

  return cumulative_loss/samples, (cumulative_accuracy/samples)*100



def test_step_baseline(net, data_loader, cost_function, device='cuda'):

  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.

  # set the network to evaluation mode
  net.eval() 

  # disable gradient computation (we are only testing, we do not want our model to be modified in this step!)
  with torch.no_grad():

    # iterate over the test set
    for batch_idx, (inputs, targets) in enumerate(data_loader):
      
      # load data into GPU
      inputs = inputs.to(device)
      targets = targets.to(device)
        
      # forward pass
      outputs = net(inputs)

      # loss computation
      loss = cost_function(outputs, targets)

      # fetch prediction and loss value
      samples+=inputs.shape[0]
      cumulative_loss += loss.item() # Note: the .item() is needed to extract scalars from tensors
      _, predicted = outputs.max(1)

      # compute accuracy
      cumulative_accuracy += predicted.eq(targets).sum().item()

  return cumulative_loss/samples, cumulative_accuracy/samples*100

In [8]:
from torch.utils.tensorboard import SummaryWriter

def main(batch_size=128, 
         device='cuda', 
         learning_rate=0.0001, 
         weight_decay=0.000001, 
         momentum=0.9, 
         epochs=50,
         entropy_loss_weight=0.1,
         nr_classes = 20, 
         img_root="gdrive/My Drive/Colab Notebooks/data/adaptiope_small"
         ):

  writer = SummaryWriter(log_dir="gdrive/My Drive/Colab Notebooks/runs/exp2")

  ## DataLoader split the size of the given dataset into #of elements in the dataset/batch size
  source_train_loader, source_test_loader, target_train_loader, target_test_loader = get_data(batch_size, img_root)
  print('DataLoaders Done')
  net = initialize_resnet34(nr_classes).to(device)
  print('Network Init Done')
  #optimizer = get_optimizer_SGD(net, learning_rate, wd = weight_decay, momentum = momentum)
  optimizer = get_optimizer_ADAM(net, learning_rate, weight_decay)
  print('Got Optimizer')
  cost_function = get_ce_cost_function()
  print('Got Cost Function')
  print('Time to train!\n==========================BASELINE========================')

  for e in range(epochs):
    ##BASELINE


    # def training_step_baseline(net, data_loader, optimizer, cost_function, scheduler, device='cuda'):
    train_loss, train_accuracy = training_step_baseline(net, source_train_loader, optimizer, cost_function, device)
    #def test_step_baseline(net, data_loader, cost_function, device='cuda'):
    test_loss, test_accuracy = test_step_baseline(net, target_test_loader, cost_function, device)

    print('Epoch: {:d}'.format(e+1))
    print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
    print('\t Test loss {:.5f}, Test accuracy {:.2f}'.format(test_loss, test_accuracy))
    print('-----------------------------------------------------')
    
    # add values to logger
    writer.add_scalar('Loss/train_loss', train_loss, e + 1)
    writer.add_scalar('Loss/test_loss', test_loss, e + 1)
    writer.add_scalar('Accuracy/train_accuracy', train_accuracy, e + 1)
    writer.add_scalar('Accuracy/test_accuracy', test_accuracy, e + 1)
  

  # perform final test step and print the final metrics
  print('After training:')
  train_loss, train_accuracy = test_step_baseline(net, source_train_loader, cost_function, device)
  test_loss, test_accuracy = test_step_baseline(net, target_test_loader, cost_function, device)

  print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
  print('\t Test loss {:.5f}, Test accuracy {:.2f}'.format(test_loss, test_accuracy))
  print('-----------------------------------------------------')


  
  # close the logger
  writer.close()

In [9]:
class SymNet(nn.Module):
  def __init__(self, n_classes: int = 20):
    super(SymNet, self).__init__()
    # Taking the feature extractor of resnet34
    # Reference: https://stackoverflow.com/questions/55083642/extract-features-from-last-hidden-layer-pytorch-resnet18
    resnet = initialize_resnet34(20, True)
    self.feature_extractor = torch.nn.Sequential(*list(resnet.children())[:-1])
    # print(self.feature_extractor)
    self.source_classifier = nn.Linear(in_features=512, out_features=n_classes)
    self.target_classifier = nn.Linear(in_features=512, out_features=n_classes)
  

  def forward(self, x):
    features = self.feature_extractor(x)
    features = features.squeeze()
    source_output = self.source_classifier(features)
    # source_output = nn.Softmax(source_output)

    target_output = self.target_classifier(features)
    # target_output = nn.Softmax(target_output)

    source_target_classifier = torch.cat((source_output, target_output), dim=1)
    
    return source_output , target_output, source_target_classifier
  
  def parameters(self):
    fe = list(self.feature_extractor.parameters())
    sc = list(self.source_classifier.parameters())
    tc = list(self.target_classifier.parameters())
    tot = fe + sc + tc
    for param in tot:
      yield param


In [10]:
def get_optimizer_SGD_uda(model, lr, wd, momentum, e, nr_epochs, classifier = True):
  #if not e==0:
  eta0 = 0.01
  alpha = 10
  beta = 0.75
  etap = eta0 / ((1 + alpha * e / nr_epochs ) ** beta)
  lr = etap * lr
  if classifier: # because Cst has a lr 10 times bigger than G
    lr = lr * 10
    params = list(model.source_classifier.parameters()) + list(model.target_classifier.parameters())
    optimizer = torch.optim.SGD(params, lr=lr, weight_decay=wd, momentum=momentum)
  else:
    optimizer = torch.optim.SGD(model.feature_extractor.parameters(), lr=lr, weight_decay=wd, momentum=momentum)
  return optimizer

def get_optimizer_ADAM_uda(model, e, nr_epochs, lr=0.0001, wd = .000001):
  eta0 = 0.01
  alpha = 10
  beta = 0.75
  etap = eta0 / ((1 + alpha * e / nr_epochs ) ** beta)
  lr = etap * lr
  optimizer = optim.Adam(model.parameters(), lr=lr, amsgrad=True, weight_decay = wd, betas=(0.8, 0.9))
  """if classifier: # because Cst has a lr 10 times bigger than G
    lr = lr * 10
    params = list(model.source_classifier.parameters()) + list(model.target_classifier.parameters())
    optimizer = optim.Adam(params, lr=lr, amsgrad=True, weight_decay = wd, betas=(0.8, 0.9))
  else:
    optimizer = optim.Adam(params=model.feature_extractor.parameters(), lr=lr, amsgrad=True, weight_decay = wd, betas=(0.8, 0.9))"""
  return optimizer
  

In [11]:
def source_loss(output, label):
  """
  Returns
  -------
  Cross entropy loss
  """
  loss_fun = nn.CrossEntropyLoss()
  loss = loss_fun(output, label)
  return loss

def target_loss(output, label):
  return source_loss(output, label)

def source_target_loss(output, st = True):
  """
  st = True if train sample belongs to source, False otherwise
  """
  n_classes = int(output.size(1)/2)
  soft = nn.Softmax(dim=1)
  prob_out = soft(output)
  if st:
    loss = -(prob_out[:,:n_classes].sum(1).log().mean())
  else:
    loss = -(prob_out[:,n_classes:].sum(1).log().mean())
  return loss

def feature_category_loss(output_st, label):
  n_classes = int(output_st.size(1)/2)

  loss_fun_1 = nn.CrossEntropyLoss()
  loss_fun_2 = nn.CrossEntropyLoss()

  loss_1 = loss_fun_1(output_st[:, :n_classes], label)/2
  loss_2 = loss_fun_2(output_st[:,n_classes:], label)/2
  return loss_1 + loss_2

def feature_domain_loss(output_st):
  n_classes = int(output_st.size(1)/2)

  soft = nn.Softmax(dim=1)
  prob_out = soft(output_st)

  loss_1 = -(prob_out[:,:n_classes]).sum(1).log().mean()/2
  loss_2 = -(prob_out[:,n_classes:]).sum(1).log().mean()/2

  return loss_1 + loss_2



def entropyMinimizationPrinciple(output_st):
    nr_classes = int(output_st.size(1)/2)
    soft = nn.Softmax(dim=1)
    prob_out = soft(output_st)

    p_st_source = prob_out[:, :nr_classes]
    p_st_target = prob_out[:, nr_classes:]
    qst = p_st_source + p_st_target

    emp = -qst.log().mul(qst).sum(1).mean()

    return emp

In [12]:
def training_step_uda(net, src_data_loader, target_data_loader, optimizer_1, lam, e,device = 'cuda'):
  source_samples = 0.
  target_samples = 0.
  cumulative_classifier_loss = 0.
  cumulative_feature_loss = 0.
  cumulative_accuracy = 0.

  target_iter = iter(target_data_loader)

  net.train()

  # iterate over the training set
  for batch_idx, (inputs_source, labels) in enumerate(src_data_loader):
    try:
      inputs_target, _ = next(target_iter)
      inputs_target = inputs_target.to(device)
    except:
      target_iter = iter(target_data_loader)
      inputs_target, _ = next(target_iter)
      inputs_target = inputs_target.to(device)
    
    # load data into GPU
    inputs_source = inputs_source.to(device)
    labels = labels.to(device)
    
    print(f"Batch number: {batch_idx}")

    length_source_input = inputs_source.shape[0]

    ## concatenation along batch dimension.
    inputs = torch.cat((inputs_source, inputs_target), dim=0)

    # forward pass
    c_s, c_t, c_st = net(inputs)

    c_s_source = c_s[:length_source_input,:]
    c_s_target = c_s[length_source_input:,:]

    c_t_source = c_t[:length_source_input,:]
    c_t_target = c_t[length_source_input:,:]

    c_st_source = c_st[:length_source_input,:]
    c_st_target = c_st[length_source_input:,:]


    # Equation 5 of the paper
    error_source_task = source_loss(c_s_source, labels)

    # Equation 6 of the paper
    error_target_task = target_loss(c_t_source, labels)

    # Equation 7 of the paper
    domain_loss_source = source_target_loss(c_st_source)
    domain_loss_target = source_target_loss(c_st_target, st = False)
    error_domain = domain_loss_source + domain_loss_target

    classifier_total_loss = error_source_task + error_target_task + error_domain

    classifier_total_loss.backward(retain_graph = True)

    for param in net.feature_extractor.parameters():
      param.grad.data.zero_()
    
    class_params = []
    for param in net.source_classifier.parameters():
      class_params.append(param.grad.data.clone())
      param.grad.data.zero_()
    for param in net.target_classifier.parameters():
      class_params.append(param.grad.data.clone())
      param.grad.data.zero_()

    # Equation 8 of the paper
    error_feature_category = feature_category_loss(c_st_source, labels)

    # Equation 9 of the paper
    error_feature_domain = feature_domain_loss(c_st_target)

    min_entropy = entropyMinimizationPrinciple(c_st_target)

    # Equations 11 of the paper
    feature_total_loss = error_feature_category + lam * (error_feature_domain + min_entropy)

    feature_total_loss.backward()

    idx = 0
    for param in net.source_classifier.parameters():
      param.grad.data = class_params[idx]
      idx += 1
    for param in net.target_classifier.parameters():
      param.grad.data = class_params[idx]
      idx += 1

    
    optimizer_1.step()
    optimizer_1.zero_grad()

    ## optimizer classifier losses composed loss
    ## order is important here!
    


    # print statistics
    source_samples+=inputs_source.shape[0]
    target_samples+=inputs_target.shape[0]
    
    cumulative_classifier_loss += classifier_total_loss.item()
    cumulative_feature_loss += feature_total_loss.item()
    _, predicted = c_s_source.max(dim = 1) ## to get the maximum probability
    cumulative_accuracy += predicted.eq(labels).sum().item()

  return cumulative_classifier_loss/source_samples, cumulative_feature_loss/target_samples, cumulative_accuracy/source_samples*100


In [13]:
def test_step(net, data_target_test_loader, device='cuda:0'):

    '''
    Params
    ------

    net : model 
    data_loader : DataLoader obj of the domain to test on
    cost_function : cost function used to address accuracies (not necessary) -> TargetClassifierLoss
    device : GPU or CPU device

    '''

    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.

    net.eval()

    with torch.no_grad():

        for batch_idx, (inputs, labels) in enumerate(data_target_test_loader):

            # load data into GPU
            inputs = inputs.to(device)
            targets = labels.to(device)
        
            # forward pass
            _, c_t, _ = net(inputs)

            # apply the loss
            loss = target_loss(c_t, targets)

            # print statistics
            samples+=inputs.shape[0]
            cumulative_loss += loss.item() # Note: the .item() is needed to extract scalars from tensors
            _, predicted = c_t.max(1)
            cumulative_accuracy += predicted.eq(targets).sum().item()

    return cumulative_loss/samples, cumulative_accuracy/samples*100

In [14]:
classes = ['backpack', 'bookcase', 'car jack', 'comb', 'crown', 'file cabinet', 'flat iron', 'game controller', 'glasses', 'helicopter', 'ice skates', 'letter tray', 'monitor', 'mug', 'network switch', 'over-ear headphones', 'pen', 'purse', 'stand mixer', 'stroller']

cuda = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 128
num_classes = len(classes)
rootdir_matteo = '/content/gdrive/MyDrive/Colab Notebooks/Deep Learning labs/DA Project/adaptiope_small'
rootdir_alessandro = 'gdrive/My Drive/Colab Notebooks/data/adaptiope_small'
rootdir_alessandro_uni = 'gdrive/My Drive/project/data/adaptiope_small'

In [22]:
from torch.utils.tensorboard import SummaryWriter
import math

def main_uda(batch_size=128,
         device=cuda, 
         lr = 0.01,
         weight_decay=0.000001, 
         momentum=0.9, 
         epochs=10,
         entropy_loss_weight=0.1,
         nr_classes = num_classes, 
         img_root=rootdir_alessandro_uni
         ):
    
  # writer = SummaryWriter(log_dir="gdrive/My Drive/Colab Notebooks/runs/exp2")

  ## DataLoader split the size of the given dataset into #of elements in the dataset/batch size
  source_train_loader, source_test_loader, target_train_loader, target_test_loader = get_data(batch_size, img_root)
  print('DataLoaders Done')
  net = SymNet().to(device)
  print('Network Init Done')
  optimizer_1 = get_optimizer_ADAM_uda(model=net, e=0, nr_epochs = epochs,lr=lr, wd=weight_decay)
  # optimizer_2 = get_optimizer_ADAM_uda(model=net, lr=lr, wd=weight_decay, e=0, nr_epochs=epochs, classifier=False)
  print('Got optimizers')

  for e in range(epochs):
    lam = 2 / (1 + math.exp(-1 * 10 * e / epochs)) - 1
    #def training_step_uda(net, src_data_loader, target_data_loader, optimizer_1, optimizer_2, lam, device = 'cuda')
    train_ce_loss, train_en_loss, train_accuracy = training_step_uda(net=net, src_data_loader=source_train_loader, 
                                                        target_data_loader=target_train_loader, 
                                                        optimizer_1=optimizer_1, lam=lam, e=e, device=device)
    torch.cuda.empty_cache()
    
    test_loss, test_accuracy = test_step(net, target_test_loader, device)

    print('Epoch: {:d}'.format(e+1))
    print('\t Train: CE loss {:.5f}, Entropy loss {:.5f}, Accuracy {:.2f}'.format(train_ce_loss, train_en_loss, train_accuracy))
    print('\t Test: CE loss {:.5f}, Accuracy {:.2f}'.format(test_loss, test_accuracy))
    print('-----------------------------------------------------')  

In [23]:
main_uda()

DataLoaders Done
Network Init Done
Got optimizers
Batch number: 0
Batch number: 1
Batch number: 2
Batch number: 3
Batch number: 4
Batch number: 5
Batch number: 6
Batch number: 7
Batch number: 8
Batch number: 9
Batch number: 10
Batch number: 11
Batch number: 12
Epoch: 1
	 Train: CE loss 0.03990, Entropy loss 0.01437, Accuracy 55.38
	 Test: CE loss 0.01664, Accuracy 68.25
-----------------------------------------------------
Batch number: 0
Batch number: 1
Batch number: 2
Batch number: 3
Batch number: 4
Batch number: 5
Batch number: 6
Batch number: 7
Batch number: 8
Batch number: 9
Batch number: 10
Batch number: 11
Batch number: 12
Epoch: 2
	 Train: CE loss 0.01472, Entropy loss 0.01405, Accuracy 97.69
	 Test: CE loss 0.01076, Accuracy 77.25
-----------------------------------------------------
Batch number: 0
Batch number: 1
Batch number: 2
Batch number: 3
Batch number: 4
Batch number: 5
Batch number: 6
Batch number: 7
Batch number: 8
Batch number: 9
Batch number: 10
Batch number: 11
Ba