In [None]:
! /opt/bin/nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Dataset Download

In [None]:
! git clone https://<token>@github.com/DLCV-Fall-2021/hw2-SonicBenz0408.git
! bash ./hw2-SonicBenz0408/get_dataset.sh

## Random seed

In [None]:
import random

import torch
import numpy as np


def same_seeds(seed):
    # Python built-in random module
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Torch
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

same_seeds(7414)

## Import Packages

In [None]:
# Training progress bar
!pip install -q qqdm

import os
import csv
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch import optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils import spectral_norm
import matplotlib.pyplot as plt
from qqdm.notebook import qqdm
from PIL import Image
import math

## Dataset
1. Resize the images to (64, 64)
1. Linearly map the values from [0, 1] to  [-1, 1].

Please refer to [PyTorch official website](https://pytorch.org/vision/stable/transforms.html) for details about different transforms.


In [None]:
class ImgDataset(Dataset):
    def __init__(self, path, labels, transform):
        self.path = path
        self.fnames = os.listdir(self.path)
        self.fnames.sort()
        self.transform = transform
        self.num_samples = len(self.fnames)
        self.labels = labels

    def __getitem__(self,idx):
        label = int(self.labels[idx])
        fname = os.path.join(self.path, self.fnames[idx])
        img = Image.open(fname).convert("RGB")
        img = self.transform(img)
        return img, label

    def __len__(self):
        return self.num_samples


### Show some images
Note that the values are in the range of [-1, 1], we should shift them to the valid range, [0, 1], to display correctly.

In [None]:
train_tfm = transforms.Compose([
    transforms.ColorJitter(brightness=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])
test_tfm = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])
#SVHN----------------------------------------------------------------------
s_train_label_path = "/content/hw2_data/digits/svhn/train.csv"
s_train_label = []
with open(s_train_label_path, "r") as f:
    rows = csv.reader(f)
    for row in rows:
        s_train_label.append(row[-1])
s_train_label.pop(0)

s_train_path = "/content/hw2_data/digits/svhn/train/"
s_train_set = ImgDataset(s_train_path, s_train_label, train_tfm)
#MNIST----------------------------------------------------------------------
m_train_label_path = "/content/hw2_data/digits/mnistm/train.csv"
m_train_label = []
with open(m_train_label_path, "r") as f:
    rows = csv.reader(f)
    for row in rows:
        m_train_label.append(row[-1])
m_train_label.pop(0)

m_train_path = "/content/hw2_data/digits/mnistm/train/"
m_train_set = ImgDataset(m_train_path, m_train_label, train_tfm)
#USPS----------------------------------------------------------------------
u_train_label_path = "/content/hw2_data/digits/usps/train.csv"
u_train_label = []
with open(u_train_label_path, "r") as f:
    rows = csv.reader(f)
    for row in rows:
        u_train_label.append(row[-1])
u_train_label.pop(0)

u_train_path = "/content/hw2_data/digits/usps/train/"
u_train_set = ImgDataset(u_train_path, u_train_label, train_tfm)

#SVHN----------------------------------------------------------------------
s_test_label_path = "/content/hw2_data/digits/svhn/test.csv"
s_test_label = []
with open(s_test_label_path, "r") as f:
    rows = csv.reader(f)
    for row in rows:
        s_test_label.append(row[-1])
s_test_label.pop(0)

s_test_path = "/content/hw2_data/digits/svhn/test/"
s_test_set = ImgDataset(s_test_path, s_test_label, test_tfm)
#MNIST----------------------------------------------------------------------
m_test_label_path = "/content/hw2_data/digits/mnistm/test.csv"
m_test_label = []
with open(m_test_label_path, "r") as f:
    rows = csv.reader(f)
    for row in rows:
        m_test_label.append(row[-1])
m_test_label.pop(0)

m_test_path = "/content/hw2_data/digits/mnistm/test/"
m_test_set = ImgDataset(m_test_path, m_test_label, test_tfm)
#USPS----------------------------------------------------------------------
u_test_label_path = "/content/hw2_data/digits/usps/test.csv"
u_test_label = []
with open(u_test_label_path, "r") as f:
    rows = csv.reader(f)
    for row in rows:
        u_test_label.append(row[-1])
u_test_label.pop(0)

u_test_path = "/content/hw2_data/digits/usps/test/"
u_test_set = ImgDataset(u_test_path, u_test_label, test_tfm)

In [None]:
images = [(u_train_set[i][0]+1)/2 for i in range(100)]
grid_img = torchvision.utils.make_grid(images, nrow=10)
plt.figure(figsize=(10,10))
plt.imshow(grid_img.permute(1, 2, 0))
plt.show()

# SVHN -> MNIST-M

## My model

In [None]:
class FeatureExtractor(nn.Module):

    def __init__(self):
        super(FeatureExtractor, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, ceil_mode=True),

            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)
        )
        
    def forward(self, x):
        x = self.conv(x)
        #print(x.shape)
        x = x.squeeze()
        return x

class LabelPredictor(nn.Module):

    def __init__(self):
        super(LabelPredictor, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 10),
        )

    def forward(self, h):
        c = self.layer(h)
        return c

class DomainClassifier(nn.Module):

    def __init__(self):
        super(DomainClassifier, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1),
        )

    def forward(self, h):
        y = self.layer(h)
        return y

In [None]:
from torch.autograd import Function
class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, lamb):
        ctx.lamb = lamb

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.lamb

        return output, None

In [None]:
print(domain_classifier)

## Training

In [None]:
batch_size = 32
lr = 1e-3
n_epoch = 100

source_loader = DataLoader(s_train_set, batch_size=batch_size, shuffle=True, num_workers=2)
target_loader = DataLoader(m_train_set, batch_size=batch_size, shuffle=True, num_workers=2)

feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().cuda()
domain_classifier = DomainClassifier().cuda()

class_criterion = nn.CrossEntropyLoss()
domain_criterion = nn.BCEWithLogitsLoss()

optimizer_F = optim.Adam(feature_extractor.parameters(), lr=lr)
optimizer_C = optim.Adam(label_predictor.parameters(), lr=lr)
optimizer_D = optim.Adam(domain_classifier.parameters(), lr=lr)


In [None]:
loss_list, acc_list = [], []
label_predictor.train()
feature_extractor.train()
domain_classifier.train()

for epoch in range(n_epoch):
    

    running_loss = 0.0
    s_count, total_num = 0, 0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_loader, target_loader)):
        lamb= 2 / (1 + math.exp(-10*epoch/n_epoch))
        bs = source_data.shape[0]

        source_data = source_data.cuda()
        source_label = source_label.cuda()

        s_domain_label = torch.zeros((bs, 1)).cuda()
        
        s_feature = feature_extractor(source_data)
        s_r_feature = ReverseLayerF.apply(s_feature, lamb)
        s_domain_logits = domain_classifier(s_r_feature)
        s_class_logits = label_predictor(s_feature)

        domain_loss_s = domain_criterion(s_domain_logits, s_domain_label)
        class_loss_s = class_criterion(s_class_logits, source_label)

        bs = target_data.shape[0]

        target_data = target_data.cuda()

        t_domain_label = torch.ones((bs, 1)).cuda()
        
        t_feature = feature_extractor(target_data)
        t_r_feature = ReverseLayerF.apply(t_feature, lamb)
        t_domain_logits = domain_classifier(t_r_feature)

        domain_loss_t = domain_criterion(t_domain_logits, t_domain_label)
        
        loss = domain_loss_s + class_loss_s + domain_loss_t
        loss.backward()

        running_loss += loss.item()

        optimizer_D.step()
        optimizer_F.step()
        optimizer_C.step()

        optimizer_D.zero_grad()
        optimizer_F.zero_grad()
        optimizer_C.zero_grad()
        
        s_count += torch.sum(torch.argmax(s_class_logits, dim=1) == source_label).item()
        total_num += bs
        
    train_loss = running_loss / (i+1)
    train_acc = s_count / total_num
    loss_list.append(train_loss)
    acc_list.append(train_acc)
    torch.save(feature_extractor.state_dict(), f'/content/extractor_model.bin')
    torch.save(label_predictor.state_dict(), f'/content/predictor_model.bin')
    torch.save(domain_classifier.state_dict(), f'/content/domain_model.bin')
    torch.save(optimizer_F.state_dict(), f'/content/F_opt.bin')
    torch.save(optimizer_C.state_dict(), f'/content/C_opt.bin')
    torch.save(optimizer_D.state_dict(), f'/content/D_opt.bin')
    print('epoch {:>3d}: loss: {:6.4f}, acc {:6.4f}'.format(epoch+1, train_loss, train_acc))



In [None]:
D_loss_list, F_loss_list, acc_list = [], [], []
label_predictor.train()
feature_extractor.train()
domain_classifier.train()

for epoch in range(n_epoch):

    running_D_loss, running_F_loss = 0.0, 0.0
    total_hit, total_num = 0.0, 0.0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_loader, target_loader)):
        lamb= 2 / (1 + math.exp(-10*epoch/n_epoch))

        source_data = source_data.cuda()
        source_label = source_label.cuda()
        target_data = target_data.cuda()
        
        mixed_data = torch.cat([source_data, target_data], dim=0)
        domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
        domain_label[:source_data.shape[0]] = 1

        feature = feature_extractor(mixed_data)
        domain_logits = domain_classifier(feature.detach())
        loss = domain_criterion(domain_logits, domain_label)
        running_D_loss+= loss.item()
        loss.backward()
        optimizer_D.step()

        class_logits = label_predictor(feature[:source_data.shape[0]])
        domain_logits = domain_classifier(feature)
        
        loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
        running_F_loss+= loss.item()
        loss.backward()
        optimizer_F.step()
        optimizer_C.step()

        optimizer_D.zero_grad()
        optimizer_F.zero_grad()
        optimizer_C.zero_grad()

        total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
        total_num += source_data.shape[0]
        print(i, end='\r')

    train_D_loss, train_F_loss, train_acc = running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num

    D_loss_list.append(train_D_loss)
    F_loss_list.append(train_F_loss)
    acc_list.append(train_acc)
    torch.save(feature_extractor.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/b_s2m_E.bin')
    torch.save(label_predictor.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/b_s2m_P.bin')
    torch.save(domain_classifier.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/b_s2m_D.bin')
    
    print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc))


In [None]:
label_predictor = LabelPredictor().cuda()
feature_extractor = FeatureExtractor().cuda()

label_predictor.load_state_dict(torch.load(f'/content/drive/MyDrive/Hw2/2-3/b_s2m_P.bin'))
feature_extractor.load_state_dict(torch.load(f'/content/drive/MyDrive/Hw2/2-3/b_s2m_E.bin'))

label_predictor.eval()
feature_extractor.eval()

test_loader = DataLoader(m_test_set, batch_size=128, shuffle=False)
count = 0
total = len(m_test_set)
for i, (data, label) in enumerate(test_loader):
    data, label = data.cuda(), label.cuda()

    logit = label_predictor(feature_extractor(data))
    pred = logit.argmax(dim=-1)
    count += torch.sum(pred==label).item()

print("acc = ", count / total)
    

# MNIST-M -> USPS

## My model

In [None]:
class FeatureExtractor(nn.Module):

    def __init__(self):
        super(FeatureExtractor, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, ceil_mode=True),

            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = x.squeeze()
        return x

class LabelPredictor(nn.Module):

    def __init__(self):
        super(LabelPredictor, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 10),
        )

    def forward(self, h):
        c = self.layer(h)
        return c

class DomainClassifier(nn.Module):

    def __init__(self):
        super(DomainClassifier, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1),
        )

    def forward(self, h):
        y = self.layer(h)
        return y

In [None]:
from torch.autograd import Function
class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, lamb):
        ctx.lamb = lamb

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.lamb

        return output, None

## Training

In [None]:
batch_size = 32
lr = 1e-3
n_epoch = 200

source_loader = DataLoader(m_train_set, batch_size=batch_size, shuffle=True, num_workers=2)
target_loader = DataLoader(u_train_set, batch_size=batch_size, shuffle=True, num_workers=2)

feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().cuda()
domain_classifier = DomainClassifier().cuda()

class_criterion = nn.CrossEntropyLoss()
domain_criterion = nn.BCEWithLogitsLoss()

optimizer_F = optim.Adam(feature_extractor.parameters(), lr=lr)
optimizer_C = optim.Adam(label_predictor.parameters(), lr=lr)
optimizer_D = optim.Adam(domain_classifier.parameters(), lr=lr)


In [None]:
loss_list, acc_list = [], []
label_predictor.train()
feature_extractor.train()
domain_classifier.train()

for epoch in range(n_epoch):
    

    running_loss = 0.0
    s_count, total_num = 0, 0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_loader, target_loader)):
        lamb= 2 / (1 + math.exp(-10*epoch/n_epoch))
        bs = source_data.shape[0]

        source_data = source_data.cuda()
        source_label = source_label.cuda()

        s_domain_label = torch.zeros((bs, 1)).cuda()
        
        s_feature = feature_extractor(source_data)
        s_r_feature = ReverseLayerF.apply(s_feature, lamb)
        s_domain_logits = domain_classifier(s_r_feature)
        s_class_logits = label_predictor(s_feature)

        domain_loss_s = domain_criterion(s_domain_logits, s_domain_label)
        class_loss_s = class_criterion(s_class_logits, source_label)

        bs = target_data.shape[0]

        target_data = target_data.cuda()

        t_domain_label = torch.ones((bs, 1)).cuda()
        
        t_feature = feature_extractor(target_data)
        t_r_feature = ReverseLayerF.apply(t_feature, lamb)
        t_domain_logits = domain_classifier(t_r_feature)

        domain_loss_t = domain_criterion(t_domain_logits, t_domain_label)
        
        loss = domain_loss_s + class_loss_s + domain_loss_t
        loss.backward()

        running_loss += loss.item()

        optimizer_D.step()
        optimizer_F.step()
        optimizer_C.step()

        optimizer_D.zero_grad()
        optimizer_F.zero_grad()
        optimizer_C.zero_grad()
        
        s_count += torch.sum(torch.argmax(s_class_logits, dim=1) == source_label).item()
        total_num += bs
    train_loss = running_loss / (i+1)
    train_acc = s_count / total_num
    loss_list.append(train_loss)
    acc_list.append(train_acc)
    torch.save(feature_extractor.state_dict(), f'/content/extractor_model.bin')
    torch.save(label_predictor.state_dict(), f'/content/predictor_model.bin')
    torch.save(domain_classifier.state_dict(), f'/content/domain_model.bin')
    torch.save(optimizer_F.state_dict(), f'/content/F_opt.bin')
    torch.save(optimizer_C.state_dict(), f'/content/C_opt.bin')
    torch.save(optimizer_D.state_dict(), f'/content/D_opt.bin')
    print('epoch {:>3d}: loss: {:6.4f}, acc {:6.4f}'.format(epoch+1, train_loss, train_acc))



In [None]:
D_loss_list, F_loss_list, acc_list = [], [], []
label_predictor.train()
feature_extractor.train()
domain_classifier.train()

for epoch in range(n_epoch):

    running_D_loss, running_F_loss = 0.0, 0.0
    total_hit, total_num = 0.0, 0.0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_loader, target_loader)):
        lamb= 2 / (1 + math.exp(-10*epoch/n_epoch))

        source_data = source_data.cuda()
        source_label = source_label.cuda()
        target_data = target_data.cuda()
        
        mixed_data = torch.cat([source_data, target_data], dim=0)
        domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
        domain_label[:source_data.shape[0]] = 1

        feature = feature_extractor(mixed_data)
        domain_logits = domain_classifier(feature.detach())
        loss = domain_criterion(domain_logits, domain_label)
        running_D_loss+= loss.item()
        loss.backward()
        optimizer_D.step()

        class_logits = label_predictor(feature[:source_data.shape[0]])
        domain_logits = domain_classifier(feature)

        loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
        running_F_loss+= loss.item()
        loss.backward()
        optimizer_F.step()
        optimizer_C.step()

        optimizer_D.zero_grad()
        optimizer_F.zero_grad()
        optimizer_C.zero_grad()

        total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
        total_num += source_data.shape[0]
        print(i, end='\r')

    train_D_loss, train_F_loss, train_acc = running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num

    D_loss_list.append(train_D_loss)
    F_loss_list.append(train_F_loss)
    acc_list.append(train_acc)
    torch.save(feature_extractor.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/b_m2u_E_s.bin')
    torch.save(label_predictor.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/b_m2u_P_s.bin')
    torch.save(domain_classifier.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/b_m2u_D_s.bin')
    print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc))


In [None]:
label_predictor.eval()
feature_extractor.eval()

test_loader = DataLoader(u_test_set, batch_size=128, shuffle=False)
count = 0
total = len(u_test_set)
for i, (data, label) in enumerate(test_loader):
    data, label = data.cuda(), label.cuda()

    logit = label_predictor(feature_extractor(data))
    pred = logit.argmax(dim=-1)
    count += torch.sum(pred==label).item()

print("acc = ", count / total)
    

# USPS -> SVHN

In [None]:
class FeatureExtractor(nn.Module):

    def __init__(self):
        super(FeatureExtractor, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 5),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),

            nn.Conv2d(32, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, 5),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            

            nn.Conv2d(512, 512, 4),
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = x.squeeze()
        return x

class LabelPredictor(nn.Module):

    def __init__(self):
        super(LabelPredictor, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 10),
        )

    def forward(self, h):
        c = self.layer(h)
        return c

class DomainClassifier(nn.Module):

    def __init__(self):
        super(DomainClassifier, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),

            nn.Linear(512, 1),
        )

    def forward(self, h):
        y = self.layer(h)
        return y

## My model ori


In [None]:
class FeatureExtractor(nn.Module):

    def __init__(self):
        super(FeatureExtractor, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 5),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),

            nn.Conv2d(32, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, 5),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            

            nn.Conv2d(512, 1024, 4),
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = x.squeeze()
        return x

class LabelPredictor(nn.Module):

    def __init__(self):
        super(LabelPredictor, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 10),
        )

    def forward(self, h):
        c = self.layer(h)
        return c

class DomainClassifier(nn.Module):

    def __init__(self):
        super(DomainClassifier, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),

            nn.Linear(512, 1),
        )

    def forward(self, h):
        y = self.layer(h)
        return y

In [None]:
from torch.autograd import Function
class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, lamb):
        ctx.lamb = lamb

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.lamb

        return output, None

## Training

In [None]:
batch_size = 16
lr = 1e-3
n_epoch = 200

source_loader = DataLoader(u_train_set, batch_size=batch_size, shuffle=True, num_workers=1)
target_loader = DataLoader(s_train_set, batch_size=batch_size, shuffle=True, num_workers=1)

feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().cuda()
domain_classifier = DomainClassifier().cuda()

class_criterion = nn.CrossEntropyLoss()
domain_criterion = nn.BCEWithLogitsLoss()

optimizer_F = optim.Adam(feature_extractor.parameters(), lr=lr)
optimizer_C = optim.Adam(label_predictor.parameters(), lr=lr)
optimizer_D = optim.Adam(domain_classifier.parameters(), lr=lr)

In [None]:
print(domain_classifier)

In [None]:
loss_list, acc_list = [], []
label_predictor.train()
feature_extractor.train()
domain_classifier.train()

for epoch in range(n_epoch):
    

    running_loss = 0.0
    s_count, total_num = 0, 0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_loader, target_loader)):
        lamb= 2 / (1 + math.exp(-10*epoch/n_epoch))
        bs = source_data.shape[0]

        source_data = source_data.cuda()
        source_label = source_label.cuda()

        s_domain_label = torch.zeros((bs, 1)).cuda()
        
        s_feature = feature_extractor(source_data)
        s_r_feature = ReverseLayerF.apply(s_feature, lamb)
        s_domain_logits = domain_classifier(s_r_feature)
        s_class_logits = label_predictor(s_feature)

        domain_loss_s = domain_criterion(s_domain_logits, s_domain_label)
        class_loss_s = class_criterion(s_class_logits, source_label)

        bs = target_data.shape[0]

        target_data = target_data.cuda()

        t_domain_label = torch.ones((bs, 1)).cuda()
        
        t_feature = feature_extractor(target_data)
        t_r_feature = ReverseLayerF.apply(t_feature, lamb)
        t_domain_logits = domain_classifier(t_r_feature)

        domain_loss_t = domain_criterion(t_domain_logits, t_domain_label)
        
        loss = domain_loss_s + class_loss_s + domain_loss_t
        loss.backward()

        running_loss += loss.item()

        optimizer_D.step()
        optimizer_F.step()
        optimizer_C.step()

        optimizer_D.zero_grad()
        optimizer_F.zero_grad()
        optimizer_C.zero_grad()
        
        s_count += torch.sum(torch.argmax(s_class_logits, dim=1) == source_label).item()
        total_num += bs
    train_loss = running_loss / (i+1)
    train_acc = s_count / total_num
    loss_list.append(train_loss)
    acc_list.append(train_acc)
    torch.save(feature_extractor.state_dict(), f'/content/extractor_model.bin')
    torch.save(label_predictor.state_dict(), f'/content/predictor_model.bin')
    torch.save(domain_classifier.state_dict(), f'/content/domain_model.bin')
    torch.save(optimizer_F.state_dict(), f'/content/F_opt.bin')
    torch.save(optimizer_C.state_dict(), f'/content/C_opt.bin')
    torch.save(optimizer_D.state_dict(), f'/content/D_opt.bin')
    print('epoch {:>3d}: loss: {:6.4f}, acc {:6.4f}'.format(epoch+1, train_loss, train_acc))



In [None]:
D_loss_list, F_loss_list, acc_list = [], [], []
label_predictor.train()
feature_extractor.train()
domain_classifier.train()

for epoch in range(n_epoch):

    running_D_loss, running_F_loss = 0.0, 0.0
    total_hit, total_num = 0.0, 0.0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_loader, target_loader)):
        lamb= 2 / (1 + math.exp(-10*epoch/n_epoch))

        source_data = source_data.cuda()
        source_label = source_label.cuda()
        target_data = target_data.cuda()
        
        mixed_data = torch.cat([source_data, target_data], dim=0)
        domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
        domain_label[:source_data.shape[0]] = 1

        feature = feature_extractor(mixed_data)
        domain_logits = domain_classifier(feature.detach())
        loss = domain_criterion(domain_logits, domain_label)
        running_D_loss+= loss.item()
        loss.backward()
        optimizer_D.step()

        class_logits = label_predictor(feature[:source_data.shape[0]])
        domain_logits = domain_classifier(feature)
        loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
        running_F_loss+= loss.item()
        loss.backward()
        optimizer_F.step()
        optimizer_C.step()

        optimizer_D.zero_grad()
        optimizer_F.zero_grad()
        optimizer_C.zero_grad()

        total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
        total_num += source_data.shape[0]
        print(i, end='\r')

    train_D_loss, train_F_loss, train_acc = running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num

    D_loss_list.append(train_D_loss)
    F_loss_list.append(train_F_loss)
    acc_list.append(train_acc)
    torch.save(feature_extractor.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/u2s_E_t.bin')
    torch.save(label_predictor.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/u2s_P_t.bin')
    torch.save(domain_classifier.state_dict(), f'/content/drive/MyDrive/Hw2/2-3/u2s_D_t.bin')
    print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc))


In [None]:
label_predictor = LabelPredictor().cuda()
feature_extractor = FeatureExtractor().cuda()

label_predictor.load_state_dict(torch.load(f'/content/drive/MyDrive/Hw2/2-3/u2s_P_s.bin'))
feature_extractor.load_state_dict(torch.load(f'/content/drive/MyDrive/Hw2/2-3/u2s_E_s.bin'))

label_predictor.eval()
feature_extractor.eval()

test_loader = DataLoader(s_test_set, batch_size=128, shuffle=False)
count = 0
total = len(s_test_set)
for i, (data, label) in enumerate(test_loader):
    data, label = data.cuda(), label.cuda()

    logit = label_predictor(feature_extractor(data))
    pred = logit.argmax(dim=-1)
    count += torch.sum(pred==label).item()

print("acc = ", count / total)
    

## TSNE

In [None]:
feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().cuda()
feature_extractor.eval()
label_predictor.eval()
E_path = "/content/drive/MyDrive/Hw2/2-3/s2m_E.bin"
P_path = "/content/drive/MyDrive/Hw2/2-3/s2m_P.bin"
feature_extractor.load_state_dict(torch.load(E_path))
label_predictor.load_state_dict(torch.load(P_path))
source_loader = DataLoader(s_test_set, batch_size=1, shuffle=False, num_workers=2)
target_loader = DataLoader(m_test_set, batch_size=1, shuffle=False, num_workers=2)

choose_s, choose_m = [], []
for img, _ in source_loader:
    img = img.cuda()
    feature = feature_extractor(img)
    feature = feature.view(1, feature.shape[0])
    output = label_predictor(feature)
    choose_s.append(output.detach().cpu().numpy().flatten())
for img, _ in target_loader:
    img = img.cuda()
    feature = feature_extractor(img)
    feature = feature.view(1, feature.shape[0])
    output = label_predictor(feature)
    choose_m.append(output.detach().cpu().numpy().flatten())
choose_s = np.array(choose_s)
choose_m = np.array(choose_m)

In [None]:
#choose_s, choose_m, choose_u = [], [], []
label_s, label_m, label_u = [], [], []

for i in range(len(s_test_set)):
    #choose_s.append(s_test_set[i][0].numpy().flatten())
    label_s.append(s_test_set[i][1])
for i in range(len(m_test_set)):
    #choose_m.append(m_test_set[i][0].numpy().flatten())
    label_m.append(m_test_set[i][1])
for i in range(len(u_test_set)):
    #choose_u.append(u_test_set[i][0].numpy().flatten())
    label_u.append(u_test_set[i][1])
label_s = np.array(label_s)
label_m = np.array(label_m)
label_u = np.array(label_u)
#choose_s = np.array(choose_s)
#choose_m = np.array(choose_m)
#choose_u = np.array(choose_u)

In [None]:
a = np.arange(100)
label_s[:100]

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

s_m = np.concatenate((choose_s, choose_m))
label = np.concatenate((label_s, label_m))



In [None]:
rows = np.arange(u_s.shape[0])
np.random.shuffle(rows)
n_select = 10000

In [None]:
tsne = TSNE(n_components=2, learning_rate=200, early_exaggeration=12.0, perplexity=50, verbose=1, n_iter=1000).fit_transform(s_m)

In [None]:
s_m.shape

In [None]:
# scale and move the coordinates so they fit [0; 1] range
def scale_to_01_range(x):
    # compute the distribution range
    value_range = (np.max(x) - np.min(x))
    # move the distribution so that it starts from zero
    # by extracting the minimal value from all its values
    starts_from_zero = x - np.min(x)
    # make the distribution fit [0; 1] by dividing by its range
    return starts_from_zero / value_range

# extract x and y coordinates representing the positions of the images on T-SNE plot
tx = tsne[:, 0]
ty = tsne[:, 1]

tx = scale_to_01_range(tx)
ty = scale_to_01_range(ty)
# initialize a matplotlib plot
fig,ax=plt.subplots(1, 2, figsize=(12, 5))
# for every class, we'll add a scatter plot separately

cmap = plt.cm.get_cmap("nipy_spectral")
colors = cmap(np.linspace(0, 1, 10))
for i in range(10):
    ax[0].scatter(tx[label == i], ty[label == i], s=3, color=colors[i], label=str(i))
    ax[0].legend(loc="best")
    #ax.scatter(tx[label_m == i], ty[label_m == i], s=10, color=colors[i], label=str(i))
ax[1].scatter(tx[:choose_s.shape[0]], ty[:choose_s.shape[0]], s=3, color=colors[2], label="source")
ax[1].scatter(tx[choose_s.shape[0]:], ty[choose_s.shape[0]:], s=3, color=colors[8], label="target")
ax[1].legend(loc="best")
plt.show()