In [1]:
#the dataset we will use is MNIST and USPS

#In this notebook we will implement the domain adversarial training of neural network

#the code is based on the paper "Domain-Adversarial Training of Neural Networks"
#also the architecture is same as the paper "Unsupervised Domain Adaptation by Backpropagation"

In [2]:
#we will use resnet50 as the base network
# we will use gradient reversal layer to implement the domain adversarial training
#we will implement gradient reversal layer at three different stages of the base
# classifier.

In [3]:
#first we will try to implement the gradient reversal layer at the end of the base classifier i.e. just before the fully connected layer and after the last feature extractor layer

In [4]:
experiment_name = 'mnist_dann'
version = 'v1'

#concat experiment name and version to get experiment id
experiment_id = experiment_name + '_' + version

model_path = 'saved_models/DANN'

In [5]:
#GPU name
#
GPU_NAME = 'cuda:1'

In [6]:
#neceassary imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable, Function
# from torchvision import datasets, transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.utils.data as data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.models as models
import torch.backends.cudnn as cudnn

import numpy as np

#import utils
import os
import itertools
import time
import copy
import random
import math


In [7]:
#imports for visualizations
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

In [8]:
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.io import read_image
from torchsummary import summary
#import tenserboard
from torch.utils.tensorboard import SummaryWriter

In [9]:
#enable cudnn
cudnn.benchmark = True
# #cuda cache clear
# torch.cuda.empty_cache()

#set random seed
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)



In [10]:
#device
device = torch.device(GPU_NAME if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [11]:
#defining the hyperparameters
BATCH = 64

EPOCHS = 10
IMAGE_SIZE = 224
CHANNELS_IMG = 1
NUM_CLASSES = 10


C = 1   #weight of the domain loss
LAMBDA = 1 #scaling factor for the gradient reversal layer
GAMMA = 10

#parameters of ADAM optimizer
LEARNING_RATE = 0.001
BETA_1 = 0.9
BETA_2 = 0.999

#parameters of SGD optimizer with momentum
MOMENTUM = 0.9


# Utility functions

In [12]:
#define function to get one hot encoding of labels
def one_hot_encoding(label, total_classes=NUM_CLASSES):
    vect = torch.eye(total_classes)
    return vect[label]

In [13]:
#we will define optimizer scheduler

## Model

In [14]:
## we will build 3 different models for implementing the domain adversarial training
#first be the feature extractor drived from resnet50
#second be a classifier after the feature extractor layer of resnet50
#third be the domain classifier with input as the feature vector from the feature extractor and output as the domain label

#### Base resnet50

In [15]:
#let us first build the feature extractor
#we input a resent50 model

#creating the model
weights = ResNet50_Weights.DEFAULT
#send weight sto gpu
# weights = weights.to(device)
#sending the model to GPU

base_resnet = resnet50(weights=weights).to(device)

In [16]:
#print model
base_resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [17]:
#we will change the first convolution layer to accept single channel image
#conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
# #if CHANNELS_IMG == 1:
# if CHANNELS_IMG == 1:
#     base_resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

# #change the last fully connected layer to output classes in NUM_CLASSES
# base_resnet.fc = nn.Linear(2048, NUM_CLASSES,  bias=True)

#write a function to changethe model based on number of channels and number of classes
def change_model(model, num_channels = CHANNELS_IMG, num_classes = NUM_CLASSES):
    if num_channels == 1:
        model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
    
    model.fc = nn.Linear(2048, num_classes,  bias=True)
    return model


base_resnet = change_model(base_resnet, num_channels = CHANNELS_IMG, num_classes = NUM_CLASSES)
base_resnet

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [18]:
#now we will write class for the feature extractor network
#we will pass the resnet50 model as the input to the class, and will use : nn.Sequential(*list(original_model.children())[:-2]) to get the feature extractor part of the model

class FeatureExtractor(nn.Module):
    def __init__(self, base_model):
        super(FeatureExtractor, self).__init__()
        # self.base_model = base_model
        self.feature_extractor = nn.Sequential(*list(base_model.children())[:-2])

    def forward(self, x):
        x = self.feature_extractor(x)
        return x

In [19]:
#now create an instance of the feature extractor and print the model
feature_extractor = FeatureExtractor(base_resnet).to(device)
feature_extractor

FeatureExtractor(
  (feature_extractor): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
       

In [20]:
#print summary of the model
#generate random pytorch tensor
# x = torch.randn(BATCH, CHANNELS_IMG, IMAGE_SIZE, IMAGE_SIZE).to(device)
#create x as (1, 1, 224, 224) random tensor
# x = torch.rand(1, 1, 224, 224).to(device)
#create numpy array of size (1, 1, 224, 224)
# x = np.random.rand(1, 1, IMAGE_SIZE, IMAGE_SIZE)
#convert numpy array to tensor
# x = torch.from_numpy(x).to(device)
#create x as (1, 1, 224, 224) random tensor
x = torch.rand( 224, 224).to(device)
#add batch dimension
x = x.unsqueeze(0)
#add channel dimension
x = x.unsqueeze(0)
#print x shape
print(x.shape)
#pass it through the model
y = feature_extractor(x)
#print y shape
print(y.shape)


# summary(feature_extractor, (CHANNELS_IMG, IMAGE_SIZE, IMAGE_SIZE))

torch.Size([1, 1, 224, 224])
torch.Size([1, 2048, 7, 7])


In [21]:
# #create a random tensor and pass it through the feature extractor
# x = torch.randn(1, 1, 224, 224).to(device)
# out_feature_extractor = feature_extractor(x)
# out_feature_extractor.shape

In [22]:
# #now we will create the classifier, it will be same as the part of the resnet50 model after the feature extractor i.e. the last two layers of the resnet50 model
# #also the number of classes will be 10 as we have 10 classes in the MNIST dataset, the number of classes is stored in NUM_CLASSES

# class ClassClassifier(nn.Module):
#     def __init__(self, base_model, num_classes=NUM_CLASSES):
#         super(ClassClassifier, self).__init__()
#         self.num_classes = num_classes
#         self.classifier = nn.Sequential(*list(base_model.children())[-2:])

#     def forward(self, x):
#         #print the shape of the input
#         print("inside classifier: input shape",x.shape)
#         x = self.classifier(x)
#         # x = x.view(-1, self.num_classes)
#         return F.softmax(x)
        

In [23]:
#now we will create the classifier, it will be same as the part of the resnet50 model after the feature extractor i.e. the last two layers of the resnet50 model
#also the number of classes will be 10 as we have 10 classes in the MNIST dataset, the number of classes is stored in NUM_CLASSES

class ClassClassifier(nn.Module):
    def __init__(self, base_model):
        super(ClassClassifier, self).__init__()
        self.avgpool= nn.Sequential(*list(base_model.children())[-2:-1])
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(*list(base_model.children())[-1:])
        #define a flatten layer
        

    def forward(self, x):
        #print the shape of the input
        # print("inside classifier: input shape",x.shape)
        x = self.avgpool(x)

        x = self.flatten(x)

        x = self.fc(x)
        return F.softmax(x)

In [24]:
#create an instance of the classifier and print the model
class_classifier = ClassClassifier(base_resnet).to(device)
class_classifier

ClassClassifier(
  (avgpool): Sequential(
    (0): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=2048, out_features=10, bias=True)
  )
)

In [25]:
# print summary of the model
# summary(class_classifier, (2048, 1, 1))

In [26]:
# #pass the out_feature_extractor through the classifier
# out_classifier = classifier(out_feature_extractor)
# #print the shape of the output
# print("output shape", out_classifier.shape)
# # out_classifier.shape
# #print the output of the classifier
# print("output", out_classifier)

In [27]:
# # summary of the classifier
# #create 
# summary(classifier, (2048, 1, 1))

In [28]:
#let us write gradient reversal layer

class GradientReversalLayer(Function):
    @staticmethod
    def forward(ctx, x, lambda_):
        ctx.lambda_ = lambda_
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.lambda_
        return output, None
    
    def grad_reverse(x, lambda_):
        return GradientReversalLayer.apply(x, lambda_)

In [29]:
#now we will create the domain classifier: named as DomainClassifier
#it will have the rest of the resnet model after the feature extractor and the classifier and will have an additional layer at the end to output the domain label: 0 for source and 1 for target
#it will have gradient reversal layer in between the feature extractor and the classifier, i.e the first layer of the domain classifier 
#it will be exactly same as ClassClassifier except for the last layer, which is not number of classes but 2 for domain labels and sigmoid activation function instead of softmax
#it will also do same [-2:] to get the classifier part of the model, first layer be gradient reversal layer

class DomainClassifier(nn.Module):
    def __init__(self, base_model):
        super(DomainClassifier, self).__init__()
        #first layer of the domain classifier be the gradient reversal layer

        self.avgpool = nn.Sequential(*list(base_model.children())[-2:-1])
        self.flatten = nn.Flatten()
        self.fc1 = nn.Sequential(nn.Linear(2048, 512, bias=True))
        self.fc2 = nn.Sequential(nn.Linear(512, 62, bias=True))
        self.fc3 = nn.Sequential(nn.Linear(62, 2, bias=True))
        
        #now add the last output layer
        # self.domain_classifier.add_module('domain_classifier_output', nn.Linear(2048, 2))
        #change the last layer to output 2 classes
        # self.fc = nn.Linear(2048, 2 , bias=True)

        #forward
    def forward(self, x, lambda_ = LAMBDA):
        x = GradientReversalLayer.grad_reverse(x, lambda_)
        x = self.avgpool(x)
        x = self.flatten(x)
        #output the domain label
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        # x = x.view(-1, 2)
        return F.softmax(x)

        

In [30]:
#create an instance of the domain classifier and print the model
domain_classifier = DomainClassifier(base_resnet).to(device)
domain_classifier

DomainClassifier(
  (avgpool): Sequential(
    (0): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Sequential(
    (0): Linear(in_features=2048, out_features=512, bias=True)
  )
  (fc2): Sequential(
    (0): Linear(in_features=512, out_features=62, bias=True)
  )
  (fc3): Sequential(
    (0): Linear(in_features=62, out_features=2, bias=True)
  )
)

In [31]:
#print summary of the model
# summary(domain_classifier, (2048, 1, 1))

In [32]:
# #create an instance of the domain classifier and print the model
# domain_classifier = DomainClassifier(base_resnet).to(device)
# #pass the output of feature extractor through the domain classifier
# out_domain_classifier = domain_classifier(out_feature_extractor, 1)
# #print the shape of the output
# print("output shape", out_domain_classifier.shape)
# #print the output of the domain classifier
# print("output", out_domain_classifier)

## Data-Processing

In [33]:
preprocess = weights.transforms()

In [34]:
#define the transform for the dataset
transform_mnist_resnet = transforms.Compose(
    [
  
    # if torch tensor then leave as it is, else convert to tensor
    transforms.Lambda(lambda x: x if isinstance(x, torch.Tensor) else transforms.functional.to_tensor(x)),
    #

    #resize to 224x224
    transforms.Resize(IMAGE_SIZE),

    #check if channels are 1, then convert to 3 channels
    transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.shape[0] == 1 else x),

    transforms.Lambda(lambda x: preprocess(x)),

    #if channels are 3, then make them 1
    transforms.Lambda(lambda x: x[0].unsqueeze(0) if x.shape[0] == 3 else x),
    
    # normalize
    transforms.Normalize(
            [0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]
        ),
    ]
)

#### Dataset

In [35]:
# for training we will use MNIST dataset in pytorch library
#for testing we will use USPS dataset

#### train data - MNIST
#### test data - USPS

In [36]:
#load train data
train_data = datasets.MNIST(root='./data/', download=True, transform=transform_mnist_resnet) 
#load train data
train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH, shuffle=True, num_workers=4)

In [37]:

#load test data
#USPS dataset
test_data = datasets.USPS(root='./data/', download=True, transform=transform_mnist_resnet)
#load test data
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH, shuffle=True, num_workers=4)

In [38]:
#print the length of train and test data
print(len(train_data))
#print the shape of train data
print(train_data[0][0].shape)


60000
torch.Size([1, 224, 224])


In [39]:
#print length of test data
print(len(test_data))
#print shape of test data
print(test_data[0][0].shape)


7291
torch.Size([1, 224, 224])


In [40]:
#print number of batches in train and test data
print(len(train_loader))
print(len(test_loader))

938
114


In [41]:
# #we will test the model on the first batch of the train data, to test the dimensions of the output of the model and the loss function
# #we define a function to test the model
# def test_model(feature_extractor, class_classifier, loss_function):
#     #make the model sequential to pass the input through the feature extractor and then the output of feature extractor to the class classifier
#     # model = nn.Sequential(feature_extractor, class_classifier, loss_function)

#     #set model to eval mode
#     feature_extractor.eval()
#     class_classifier.eval()

#     #initialize the loss and number of correct predictions
#     loss = 0
#     correct = 0

#     #get the first batch of the train data
#     #wee use for and then break to get the first batch
#     for data, label in train_loader:

#         #print data shape
#         print("data shape", data.shape)
#         #print label shape
#         print("label shape", label.shape)
#         #print data type
#         print("data type", data.dtype)
#         #print label type
#         print("label type", label.dtype)
#         #print first label
#         print("first label", label[0])
#         #print all label
#         print("all label", label)


#         #move data and label to device
#         data, label = data.to(device), label.to(device)

#         #we measure the accuracy here
#         #get output from the model
#         # output = model(data)
#         #get the output from the feature extractor
#         feature = feature_extractor(data)
#         #print the shape of the feature
#         print("feature shape: ", feature.shape)
#         #print type of feature
#         print("feature type: ", type(feature))
#         #get the output from the class classifier
#         output = class_classifier(feature)
#         #print the shape of the output
#         print("output shape: ", output.shape)
#         #print type of output

#         #print output shape
#         print("output shape: ", output.shape)
#         #print output type
#         print("output type: ", type(output))

#         #get the loss
#         loss = loss_function(output, label)

#         #print loss
#         print("loss: ", loss)

#         #get the prediction
#         pred = output.argmax(dim=1, keepdim=True)
#         #print pred
#         print("pred: ", pred)
#         #update the correct predictions
#         correct = pred.eq(label.view_as(pred)).sum().item()

#         #print correct
#         print("correct: ", correct)
#         #accuracy
#         accuracy = correct / len(data)
#         #print accuracy
#         print("accuracy: ", accuracy)





#         #break
#         break

# #now we test the above function and build model and loss function
# #build the model
# #the classifier is composed of two models - feature extractor and classifier in sequence: input first passes through feature extractor and then the output of feature extractor is passed to classifier
# # and the output of classifier is the final output
# classifier = ClassClassifier(base_resnet, NUM_CLASSES).to(device)
# #build the feature extractor model from the class FeatureExtractor
# feature_extractor = FeatureExtractor(base_resnet).to(device)
# #now we make end to end model by combining the feature extractor and classifier
# # model = nn.Sequential(feature_extractor, classifier).to(device)
# #build the loss function
# loss_function = nn.CrossEntropyLoss().to(device)
# #test the model
# test_model(feature_extractor, classifier, loss_function)




In [42]:
#initialize tensorboard writer
#create writer for tensorboard
writer = SummaryWriter(f'runs/'+experiment_id)

2022-11-08 18:04:03.371119: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [43]:
#write a save model function
#the arguments are the models, the optimizer, the epoch number, the model name
def save_model(feature_extractor, class_classifier, optimizer, epoch):
    #check if path exists, if not then create it
    #recursively check if each root directory exists, if not then recursively create each
    if not os.path.exists(model_path):
        os.makedirs(model_path)



    #we will save the model by the name of the experiment id and the epoch number
    torch.save(feature_extractor.state_dict(), f'{model_path}/{experiment_id}_feature_extractor_{epoch}.pth')
    torch.save(class_classifier.state_dict(), f'{model_path}/{experiment_id}_class_classifier_{epoch}.pth')
    torch.save(domain_classifier.state_dict(), f'{model_path}/{experiment_id}_domain_classifier_{epoch}.pth')

## DANN Training

#### Pretraining Class Classifier and Domain classifers

In [44]:
#we will write a routine to make the class classifier to have optimal performance on the source domain
#this we call the pretrained model : as the class classifier is pretrained on the source domain
# this kind of initialization and
#better initialization would help the training to converge to a better solution

#funtion to train the model: feature extractor and class classifier on the source domain
#we make feature extractor non trainable
#only parameters of class classifier are trainable

# def 

#### Test function

#### Source Domain Label : 0
#### Target Domain Label : 1

In [45]:
#write a function to test the model, i.e. the class classifier on the test data
#the function takes as argument : the feature extractor, class classifier, domain classifier, test data loader 
def DANN_test(feature_extractor, class_classifier, domain_classifier, test_data_loader, dataset='TARGET'):
    #set the model to evaluation mode
    feature_extractor.eval()
    class_classifier.eval()
    domain_classifier.eval()

    #set the total and correct for class classifier to 0
    total_class_classifier = 0
    correct_class_classifier = 0
    #set the total and correct for domain classifier to 0
    total_domain_classifier = 0
    correct_domain_classifier = 0
    
    #set the writer
    # writer.add_scalar
 
    #loop through the test data loader
    with torch.no_grad():
        #loop through the test data loader
        for data, labels in test_data_loader:
            #get the data and labels
            data, labels = data.to(device), labels.to(device)
            #if dataset is target, then we will add the domain labels as 1
            if dataset == 'TARGET':
                domain_labels = torch.ones(data.size(0)).long().to(device)
            #if dataset is source, then we will add the domain labels as 0
            elif dataset == 'SOURCE':
                domain_labels = torch.zeros(data.size(0)).long().to(device)
                #if dataset is not source or target, then we will raise an error
            else:
                raise ValueError('Dataset must be source or target')
            #get the feature vector
            feature_vector = feature_extractor(data)
            #get the class classifier prediction
            class_classifier_pred = class_classifier(feature_vector)
            #get the domain classifier prediction
            domain_classifier_pred = domain_classifier(feature_vector)
   
            #get the predicted class
            _, predicted_class = torch.max(class_classifier_pred.data, 1)
            #get the total and correct for class classifier
            total_class_classifier += labels.size(0)
            correct_class_classifier += (predicted_class == labels).sum().item()
            #get the total and correct for domain classifier
            _, predicted_domain = torch.max(domain_classifier_pred.data, 1)
            total_domain_classifier += domain_labels.size(0)
            correct_domain_classifier += (predicted_domain == domain_labels).sum().item()
    #print the loss and accuracy
    print(dataset+'_'+'Test Accuracy Class Classifier: {}/{} ({:.0f}%)'.format(correct_class_classifier, total_class_classifier,
        100. * correct_class_classifier / total_class_classifier))
    print(dataset+'_'+'Test Accuracy Domain Classifier: {}/{} ({:.0f}%)'.format(correct_domain_classifier, total_domain_classifier,
        100. * correct_domain_classifier / total_domain_classifier))
    
    #write the loss and accuracy to the tensorboard
    writer.add_scalar(dataset+'Test_Class_Accuracy', 100. * correct_class_classifier / total_class_classifier, global_step=0)
    writer.add_scalar(dataset+'Test_Domain_Accuracy', 100. * correct_domain_classifier / total_domain_classifier, global_step=0)
    #close the writer
    writer.close()

        

In [46]:
#let us first see the models performance on the test data before training: so we can compare the performance before and after training
#test the model on the test data
DANN_test(feature_extractor, class_classifier, domain_classifier, test_loader, dataset='TARGET')

  return F.softmax(x)
  return F.softmax(x)


TARGET_Test Accuracy Class Classifier: 653/7291 (9%)
TARGET_Test Accuracy Domain Classifier: 1339/7291 (18%)


In [47]:
#we will also test the model on the source data
DANN_test(feature_extractor, class_classifier, domain_classifier, train_loader, dataset='SOURCE')

  return F.softmax(x)
  return F.softmax(x)


SOURCE_Test Accuracy Class Classifier: 5606/60000 (9%)
SOURCE_Test Accuracy Domain Classifier: 31012/60000 (52%)


In [48]:
# we will write a function for training the models as per DANN paper
#the arguments are: feature extractor, Class Classifier, Domain Classifier, source data loader, target data loader, number of epochs

#we will simultaneously track the loss and accuracy of the model on source and target data
#so we will store total corrects and total predictions for both source and target data


def DANN_training(feature_extractor, class_classifier, domain_classifier, source_data_loader, target_data_loader, num_epochs=EPOCHS, device = device):
    #for optimizer we will club all the parameters of all the models
    #we use Adam optimizer
    #the parameters of ADAM  are the standard ones for ADAM optimizer
    optimizer = optim.Adam(list(feature_extractor.parameters()) + list(class_classifier.parameters()) + list(domain_classifier.parameters()), lr=LEARNING_RATE, betas=(BETA_1, BETA_2))
    #we define the loss function for the domain classifier as cross entropy loss
    loss_fn_domain = nn.CrossEntropyLoss().to(device)
    #we define the loss function for the class classifier as cross entropy loss
    loss_fn_class = nn.CrossEntropyLoss().to(device)

    #we will iterate through datasets in a unique way, as we need one batch of source data and one batch of target data at a time to process through the models and then update the parameters
    #but we do not have same number of batches in source and target data
    #so we will first assume that source data has more batches than target data
    #write an assert statement to check if this is true
    # assert len(source_data_loader) >= len(target_data_loader), "Source data should have more batches than target data"
    #now we have source data to have more batches than target data, thus we will have 2 loops, one for source data and one for target data
    #the outer would be for source data and inner for target data
    #and we get the batch number of the source data from the outer loop and in the inner loop through the target data we will select the corresponding batch number of the target data 
    #, if the batch number is less than or equal to the minimum of the number of batches in source and target data,
    # else when we have exhausted the target data, we will start again from the first batch of the target data
    #we will do this untill we have exhausted the source data

    #we will keep track of the batch number of the source data in the outer loop
    batch_num_source = 0
    #we will keep track of the batch number of the target data in the inner loop
    batch_num_target = 0
    #we will keep track of the minimum of the number of batches in source and target data
    min_batches = min(len(source_data_loader), len(target_data_loader))
    #we will keep track of the number of batches in source data
    num_batches_source = len(source_data_loader)
    #we will keep track of the number of batches in target data
    num_batches_target = len(target_data_loader)

    #set all the models to train mode
    feature_extractor.train()
    class_classifier.train()
    domain_classifier.train()

    #variable to store the statistics of model training, like : loss, accuracy, etc
    epoch_tracker = 0   #it tracks the loop number
    batch_tracker = 0   #it tracks the batch number * epoch number
    #loss
    total_loss = 0
    total_class_loss = 0
    total_domain_loss = 0
    #accuracy
    total_class_classifier = 0
    correct_class_classifier = 0
    total_domain_classifier = 0
    correct_domain_classifier = 0

    best_epoch_accuracy = 0     #this will store the best epoch accuracy
    best_epoch = 0              #this will store the best epoch number

    #loop through the epochs
    for epoch in range(num_epochs):

        #if epoch is not 0, then we have values stored in epoch_total_class_classifier, epoch_total_class_correct, epoch_total_domain_classifier, epoch_total_domain_correct
        #so we will calculate the accuracy and loss for the previous epoch
        if epoch != 0:
            #calculate the accuracy
            epoch_accuracy_class_classifier = 100 * correct_class_classifier / total_class_classifier
            epoch_accuracy_domain_classifier = 100 * correct_domain_classifier / total_domain_classifier
            #calculate the loss, we divide total loss by batch_size, as we have added the loss for all the elements in the batch
            epoch_loss_class_classifier = total_class_loss /  batch_min_size
            epoch_loss_domain_classifier = total_domain_loss / batch_min_size
            #write the loss and accuracy to tensorboard
            writer.add_scalar('EPOCH_Train_Loss_class_classifier', epoch_loss_class_classifier, epoch_tracker)
            writer.add_scalar('EPOCH_Train_Loss_domain_classifier', epoch_loss_domain_classifier, epoch_tracker)
            writer.add_scalar('EPOCH_Train_Accuracy_class_classifier', epoch_accuracy_class_classifier, epoch_tracker)
            writer.add_scalar('EPOCH_Train_Accuracy_domain_classifier', epoch_accuracy_domain_classifier, epoch_tracker)

            #if the epoch_accuracy_domain_classifier is greater than best_epoch_accuracy, then we will update the best_epoch_accuracy and best_epoch
            if epoch_accuracy_domain_classifier > best_epoch_accuracy:
                best_epoch_accuracy = epoch_accuracy_domain_classifier
                best_epoch = epoch
                #we will save the model by the name of the experiment id and the epoch number
                #call the save_model function
                save_model(feature_extractor, class_classifier, domain_classifier, epoch)



            #increment the epoch tracker
            epoch_tracker += 1
            
          

        #epoch variable to track statistics in an epoch
        #loss
        epoch_total_loss = 0
        epoch_total_class_loss = 0
        epoch_total_domain_loss = 0
        #accuracy
        epoch_total_class_classifier = 0
        epoch_correct_class_classifier = 0
        epoch_total_domain_classifier = 0
        epoch_correct_domain_classifier = 0
        

        #start outer loop for source data
        for batch_idx_source, (source_,target_) in enumerate(zip(source_data_loader, target_data_loader)):
            #if the batch number of source data is less than the minimum of the number of batches in source and target data, then we will select the corresponding batch number of the target data
            # #we name that as batch_of_target
            # if batch_idx_source < min_batches:
            #     batch_of_target = batch_idx_source

            # #check if the batch_idx_source is equal to or greater than the minimum of the number of batches in source and target data
            # else:
            #     #if yes, then we will start again from the first batch of the target data
            #     batch_of_target = batch_idx_source % min_batches

        #start inner loop for target data
        # for batch_idx_target, (target_data, target_labels) in enumerate(target_data_loader):
            temp_total_class_classifier = 0
            temp_correct_class_classifier = 0
            temp_total_domain_classifier = 0
            temp_correct_domain_classifier = 0
            #now we will run the inner loop only when batch_idx_target is equal to the batch_of_target
            #in all else cases we will continue
            # if batch_idx_target != batch_of_target:
            #     continue

            #now we have one batch of source data and one batch of target data
            #now we run as if we are training a normal model with source data and target data in a single batch
            #send source data and target data to device
            source_data, source_labels = source_
            target_data, target_labels = target_
            source_data, source_labels = source_data.to(device), source_labels.to(device)
            target_data, target_labels = target_data.to(device), target_labels.to(device)

            #now we will calculate the lambda for gradient reversal layer: for current loop
            p = float(batch_idx_source + epoch *num_batches_source ) / num_epochs * num_batches_source
            lambda_ = 2. / (1. + np.exp(- GAMMA * p)) - 1

            #now we check that the number of images in source data and target data are same: i.e. batch size is same: this is particularly useful in case of last batch
            batch_min_size = min(source_data.shape[0], target_data.shape[0])
            #now we will select the same number of images from source and target data
            source_data = source_data[:batch_min_size]
            source_labels = source_labels[:batch_min_size]
            target_data = target_data[:batch_min_size]
            target_labels = target_labels[:batch_min_size]


            #zero grdient optimizer
            optimizer.zero_grad()

            #now we will create the domain labels for source and target data
            #this is required for the domain classifier
            #source domain label is 0
            source_domain_labels = torch.zeros(source_data.shape[0]).long().to(device)
            #target domain label is 1
            target_domain_labels = torch.ones(target_data.shape[0]).long().to(device)

            #now we will pass the source data and target data through the feature extractor
            source_features = feature_extractor(source_data)
            target_features = feature_extractor(target_data)

            #now we will calculate class prediction of source data
            source_class_pred = class_classifier(source_features)
            #now we calculate the loss for the class classifier
            class_classifier_loss = loss_fn_class(source_class_pred, source_labels)

            #now we will calculate the domain prediction of source data and target data
            source_domain_pred = domain_classifier(source_features, lambda_)
            target_domain_pred = domain_classifier(target_features, lambda_)
            #now we will calculate the loss for the domain classifier
            domain_classifier_loss = loss_fn_domain(source_domain_pred, source_domain_labels) + loss_fn_domain(target_domain_pred, target_domain_labels)

            #now we will calculate the total loss using the parameter: C
            total_loss = class_classifier_loss + C * domain_classifier_loss

            #now we will calculate the gradients
            total_loss.backward()
            #now we will update the parameters
            optimizer.step()

            #now we will calculate the accuracy of the class classifier
            _, predicted = torch.max(source_class_pred.data, 1)
            temp_total_class_classifier += source_labels.size(0)
            temp_correct_class_classifier += (predicted == source_labels).sum().item()
            #add the total and correct to total, correct and epoch_total, epoch_correct
            total_class_classifier += temp_total_class_classifier
            correct_class_classifier += temp_correct_class_classifier
            epoch_total_class_classifier += temp_total_class_classifier
            epoch_correct_class_classifier += temp_correct_class_classifier

            #now we will calculate the accuracy of the domain classifier
            _, predicted = torch.max(source_domain_pred.data, 1)
            temp_total_domain_classifier += source_domain_labels.size(0)
            temp_correct_domain_classifier += (predicted == source_domain_labels).sum().item()
            _, predicted = torch.max(target_domain_pred.data, 1)
            temp_total_domain_classifier += target_domain_labels.size(0)
            temp_correct_domain_classifier += (predicted == target_domain_labels).sum().item()
            #add the total and correct to total, correct and epoch_total, epoch_correct
            total_domain_classifier += temp_total_domain_classifier
            correct_domain_classifier += temp_correct_domain_classifier
            epoch_total_domain_classifier += temp_total_domain_classifier
            epoch_correct_domain_classifier += temp_correct_domain_classifier

            #now we will print the loss and accuracy based on the temp values
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tClass Loss: {:.6f}\tDomain Loss: {:.6f}\tClass Accuracy: {}/{} ({:.0f}%)\tDomain Accuracy: {}/{} ({:.0f}%)'.format(
                epoch, batch_idx_source * len(source_data), len(source_data_loader.dataset),
                100. * batch_idx_source / len(source_data_loader), class_classifier_loss.item(), domain_classifier_loss.item(),
                temp_correct_class_classifier, temp_total_class_classifier,
                100. * temp_correct_class_classifier / temp_total_class_classifier, temp_correct_domain_classifier, temp_total_domain_classifier,
                100. * temp_correct_domain_classifier / temp_total_domain_classifier))
            
            #we will also write the loss and accuracy to the tensorboard based on temp values
            writer.add_scalar('BATCH_Train_Class_Loss', class_classifier_loss.item(), global_step=batch_tracker)
            writer.add_scalar('BATCH_Train_Domain_Loss', domain_classifier_loss.item(), global_step=batch_tracker)
            writer.add_scalar('BATCH_Train_Class_Accuracy', 100. * temp_correct_class_classifier / temp_total_class_classifier, global_step=batch_tracker)
            writer.add_scalar('BATCH_Train_Domain_Accuracy', 100. * temp_correct_domain_classifier / temp_total_domain_classifier, global_step=batch_tracker)




            batch_tracker += 1

        #if epoch is 0 , then we will save the models and store best accuracy and epoch
        if epoch == 0:
            #save the model
            #call the save model function
            save_model(feature_extractor, class_classifier, domain_classifier, epoch)
            #store the best accuracy and epoch
            best_accuracy = 100. * epoch_correct_class_classifier / epoch_total_class_classifier
            best_epoch = epoch
    #now we will print the total accuracy for the training data
    #now we will print the total accuracy for the training data
    total_class_classifier_accuracy = 100. * correct_class_classifier / total_class_classifier
    total_domain_classifier_accuracy = 100. * correct_domain_classifier / total_domain_classifier
    print('Train Accuracy Class Classifier: {}/{} ({:.0f}%)'.format(correct_class_classifier, total_class_classifier, total_class_classifier_accuracy))
    print('Train Accuracy Domain Classifier: {}/{} ({:.0f}%)'.format(correct_domain_classifier, total_domain_classifier, total_domain_classifier_accuracy))
    #we will also write the loss and accuracy to the tensorboard based on total values
    writer.add_scalar('TOTAL_Train_Class_Accuracy', total_class_classifier_accuracy, global_step=0)
    writer.add_scalar('TOTAL_Train_Domain_Accuracy', total_domain_classifier_accuracy, global_step=0)
    #save the model
    #call the save model function
    save_model(feature_extractor, class_classifier, domain_classifier, epoch= EPOCHS)
    #print the best accuracy and epoch
    print('Best Accuracy: {:.0f}% at Epoch: {}'.format(best_epoch_accuracy, best_epoch))


    #close the writer
    writer.close()

    #return models
    return feature_extractor, class_classifier, domain_classifier

    
        



                

                
                    




In [49]:
#
#test the function on the models and data
feature_extractor, class_classifier, domain_classifier = DANN_training(feature_extractor, class_classifier, domain_classifier,train_loader,test_loader)

  return F.softmax(x)
  return F.softmax(x)


Train Accuracy Class Classifier: 68605/72910 (94%)
Train Accuracy Domain Classifier: 72625/145820 (50%)
Best Accuracy: 50% at Epoch: 9


## Testing

In [50]:
#test DANN on the test data
DANN_test(feature_extractor, class_classifier, domain_classifier, test_loader, dataset='TARGET')

  return F.softmax(x)
  return F.softmax(x)


TARGET_Test Accuracy Class Classifier: 6696/7291 (92%)
TARGET_Test Accuracy Domain Classifier: 7291/7291 (100%)


In [51]:
#test  DANN predictions on train data
DANN_test(feature_extractor, class_classifier, domain_classifier, train_loader, dataset='SOURCE')

  return F.softmax(x)
  return F.softmax(x)


SOURCE_Test Accuracy Class Classifier: 58527/60000 (98%)
SOURCE_Test Accuracy Domain Classifier: 0/60000 (0%)
