### Dataset Download

In [None]:
! git clone https://<token>@github.com/DLCV-Fall-2021/hw2-SonicBenz0408.git
! bash ./hw2-SonicBenz0408/get_dataset.sh

## Random seed

In [None]:
import random

import torch
import numpy as np


def same_seeds(seed):
    # Python built-in random module
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Torch
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

same_seeds(7414)

## Import Packages

In [None]:
# Training progress bar
!pip install -q qqdm

import os
import csv
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.nn.utils import spectral_norm
from torch import optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from qqdm.notebook import qqdm

## Dataset


In [None]:
class ImgDataset(Dataset):
    def __init__(self, path, labels, transform):
        self.path = path
        self.fnames = os.listdir(self.path)
        self.fnames.sort()
        self.transform = transform
        self.num_samples = len(self.fnames)
        self.labels = labels

    def __getitem__(self,idx):
        label = int(self.labels[idx])
        fname = os.path.join(self.path, self.fnames[idx])
        img = torchvision.io.read_image(fname)
        img = self.transform(img)
        return img, label

    def __len__(self):
        return self.num_samples


In [None]:
tfm = transforms.Compose([
    transforms.ToPILImage(),
    #transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ColorJitter(brightness=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])
train_label_path = "/content/hw2_data/digits/mnistm/train.csv"
train_label = []
with open(train_label_path, "r") as f:
    rows = csv.reader(f)
    for row in rows:
        train_label.append(row[-1])
train_label.pop(0)

train_path = "/content/hw2_data/digits/mnistm/train/"
train_set = ImgDataset(train_path, train_label, tfm)

In [None]:
images = [(train_set[i][0]+1)/2 for i in range(25)]
grid_img = torchvision.utils.make_grid(images, nrow=5)
plt.figure(figsize=(10,10))
plt.imshow(grid_img.permute(1, 2, 0))
plt.show()

## My model

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

class ResidualBlock(nn.Module):
    def __init__(self, input_dim, output_dim, resample, last=False):
        super(ResidualBlock, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.resample = resample
        self.leaky_relu = nn.LeakyReLU(0.2, inplace=True)
        if resample == 'down':
            self.conv_shortcut = nn.Sequential(
                spectral_norm(nn.Conv2d(input_dim, output_dim, 3, 2, 1)),
                #nn.AvgPool2d(2, 2, ceil_mode=True)
            )
            self.conv_1 = nn.Sequential(
                spectral_norm(nn.Conv2d(input_dim, input_dim, 3, 1, 1)),
                nn.LeakyReLU(0.2, inplace=True)
            )
            self.conv_2 = nn.Sequential(
                spectral_norm(nn.Conv2d(input_dim, output_dim, 3, 2, 1)),
            )
        elif resample == 'up':
            if not last:
                self.conv_shortcut = nn.Sequential(
                    #nn.Upsample(scale_factor=2),
                    #nn.Conv2d(input_dim, output_dim, 3, 1, 1)
                    nn.ConvTranspose2d(input_dim, output_dim, 5, 2, 0, 0),
                    
                )
                self.conv_1 = nn.Sequential(
                    nn.Conv2d(input_dim, output_dim, 3, 1, 1),
                    nn.BatchNorm2d(output_dim),
                    nn.ReLU(inplace=True),
                    nn.Dropout2d(0.5)
                )
                self.conv_2 = nn.Sequential(
                    nn.ConvTranspose2d(output_dim, output_dim, 5, 2, 0, 0),
                    nn.BatchNorm2d(output_dim),
                    nn.ReLU(inplace=True),
                    nn.Dropout2d(0.5)
                )
            else:
                self.conv_shortcut = nn.Sequential(
                    #nn.Upsample(scale_factor=2),
                    #nn.Conv2d(input_dim, output_dim, 3, 1, 1)
                    nn.ConvTranspose2d(input_dim, output_dim, 5, 2, 1, 1),
                    
                )
                self.conv_1 = nn.Sequential(
                    nn.Conv2d(input_dim, output_dim, 3, 1, 1),
                    nn.BatchNorm2d(output_dim),
                    nn.ReLU(inplace=True),
                    #nn.Dropout2d(0.5)
                )
                self.conv_2 = nn.Sequential(
                    nn.ConvTranspose2d(output_dim, output_dim, 5, 2, 1, 1),
                    nn.BatchNorm2d(output_dim),
                    nn.ReLU(inplace=True),
                    #nn.Dropout2d(0.5)
                )

        elif resample==None:
            self.conv_shortcut = nn.Conv2d(input_dim, output_dim, 3, 1, 1)
            self.conv_1 = nn.Sequential(
                spectral_norm(nn.Conv2d(input_dim, output_dim, 3, 1, 1)),
                nn.BatchNorm2d(output_dim),
                nn.LeakyReLU(0.2, inplace=True)
            )
            self.conv_2 = nn.Sequential(
                spectral_norm(nn.Conv2d(output_dim, output_dim, 3, 1, 1)),
                nn.BatchNorm2d(output_dim),
                nn.LeakyReLU(0.2, inplace=True)
            )

    def forward(self, input):
        shortcut = self.conv_shortcut(input)
        output = input
        output = self.conv_1(output)
        output = self.conv_2(output)

        if self.input_dim != 8 * 64 and self.resample == "down":
            output = self.leaky_relu(output)

        return shortcut + output

class Generator(nn.Module):
    def __init__(self, in_dim):
        super(Generator, self).__init__()
        self.dim = 64
        self.label_emb = nn.Embedding(10, in_dim)
        self.ln1 = nn.Linear(in_dim, self.dim * 8 * 1 * 1)
        #self.rb1 = ResidualBlock(8 * self.dim, 8 * self.dim, resample = 'up')
        self.rb1 = ResidualBlock(8 * self.dim, 4 * self.dim, resample = 'up')
        self.rb2 = ResidualBlock(4 * self.dim, 2 * self.dim, resample = 'up')
        self.rb3 = ResidualBlock(2 * self.dim, 1 * self.dim, resample = 'up', last=True)

        self.conv_f = nn.Sequential(
            nn.Conv2d(self.dim, 3, 3, 1, 1),
            nn.Tanh()
        )
    
    def forward(self, noise, labels):
        embed = self.label_emb(labels)
        output = torch.mul(embed, noise)
        output = self.ln1(output)
        output = output.view(-1, 8 * self.dim, 1, 1)
        output = self.rb1(output)
        output = self.rb2(output)
        output = self.rb3(output)
        #output = self.rb4(output)

        output = self.conv_f(output)
        return output

class Discriminator(nn.Module):

    def __init__(self, in_dim):
        super(Discriminator, self).__init__()

        self.dim = 64
        self.rb1 = ResidualBlock(3, self.dim, resample = 'down')
        self.rb2 = ResidualBlock(self.dim, 2 * self.dim, resample = 'down')
        self.rb3 = ResidualBlock(2 * self.dim, 4 * self.dim, resample = 'down')
        self.rb4 = ResidualBlock(4 * self.dim, 8 * self.dim, resample = None)
        #self.rb5 = ResidualBlock(8 * self.dim, 8 * self.dim, resample = None)
        #self.pool_f = spectral_norm(nn.Conv2d(8 * self.dim, 8 * self.dim, 4))

        self.fc_source = nn.Linear(self.dim * 8 * 4 * 4, 1)
        self.fc_class = nn.Linear(self.dim * 8 * 4 * 4, 10)
        #self.sig = nn.Sigmoid()
        #self.soft = nn.Softmax()

    def forward(self, input):
        output = input
        output = self.rb1(output)
        output = self.rb2(output)
        output = self.rb3(output)
        output = self.rb4(output)
        #output = self.rb5(output)
        #output = self.pool_f(output)
        output = output.view(output.size(0), self.dim * 8 * 4 * 4) 
        rf = self.fc_source(output)
        c = self.fc_class(output)
        return rf, c

        

## Final Model (Summitted)

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

class Generator(nn.Module):
    def __init__(self, in_dim):
        super(Generator, self).__init__()
        self.label_emb = nn.Embedding(10, in_dim)

        def layer(in_dim, out_dim):
            return nn.Sequential(
                nn.ConvTranspose2d(in_dim, out_dim, 5, 2, padding=0, output_padding=0, bias=False),
                nn.BatchNorm2d(out_dim),
                nn.ReLU(inplace=True),
                nn.Dropout2d(0.5)
            )
        self.dim = 64
        self.fc = nn.Sequential(
            nn.Linear(in_dim, self.dim * 8, bias=False),
        )
        
        self.layer2 = layer(self.dim * 8, self.dim * 4)
        self.layer3 = layer(self.dim * 4, self.dim * 2)
        self.layer_final = nn.Sequential(
            nn.ConvTranspose2d(self.dim * 2, 3, 5, 2, padding=1, output_padding=1),
            nn.Tanh()
        )

        self.apply(weights_init)
    
    def forward(self, noise, labels):
        embed = self.label_emb(labels)
        x = torch.mul(embed, noise)
        x = self.fc(x)
        x = x.view(x.size(0), -1, 1, 1)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer_final(x)
        return x

class Discriminator(nn.Module):
    def __init__(self, in_dim):
        super(Discriminator, self).__init__()

        self.layer1 = nn.Sequential(
            spectral_norm(nn.Conv2d(in_dim, 64, 3, 2, 1)), 
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.layer2 = nn.Sequential(
            spectral_norm(nn.Conv2d(64, 64, 3, 1, 1)),
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.layer3 = nn.Sequential(
            spectral_norm(nn.Conv2d(64, 128, 3, 2, 1)), 
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.layer4 = nn.Sequential(
            spectral_norm(nn.Conv2d(128, 128, 3, 1, 1)), 
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.layer5 = nn.Sequential(
            spectral_norm(nn.Conv2d(128, 256, 3, 2, 1)), 
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.layer6 = nn.Sequential(
            spectral_norm(nn.Conv2d(256, 256, 3, 1, 1)), 
            nn.LeakyReLU(0.2, inplace=True),
        )
        

        self.fc_source = nn.Linear(256 * 4 * 4, 1, bias=False)
        self.fc_class = nn.Linear(256 * 4 * 4, 10, bias=False)

        self.apply(weights_init)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = x.view(x.size(0), 256 * 4 * 4)
        rf = self.fc_source(x)
        c = self.fc_class(x)
        return rf, c

        

## Training

In [None]:
# Training hyperparameters
batch_size = 64
z_dim = 128

z_sample = Variable(torch.randn(100, z_dim)).cuda()

z_sample_label = []
for i in range(10):
    for j in range(10):
        z_sample_label.append(i)
z_sample_label = Variable(torch.LongTensor(z_sample_label)).cuda()

lr = 2e-4

n_epoch = 150
n_critic = 5
clip_value = 0.01
lambda_gp = 10
acgan_d_scale = 3
acgan_g_scale = 1

#log_dir = os.path.join(workspace_dir, 'logs')
ckpt_dir = os.path.join("/content/", 'checkpoints')
#os.makedirs(log_dir, exist_ok=True)
os.makedirs(ckpt_dir, exist_ok=True)

# Model
G = Generator(in_dim=z_dim).cuda()
D = Discriminator(3).cuda()
G.train()
D.train()

# Loss
dis_criterion = nn.BCELoss()
aux_criterion = nn.CrossEntropyLoss()

# Optimizer
opt_D = torch.optim.Adam(D.parameters(), lr=lr, betas=(0, 0.9))
opt_G = torch.optim.Adam(G.parameters(), lr=lr, betas=(0, 0.9))

# DataLoader
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)

sch_D = torch.optim.lr_scheduler.StepLR(opt_D, step_size=len(train_loader)*2, gamma=0.95)
sch_G = torch.optim.lr_scheduler.StepLR(opt_G, step_size=len(train_loader)*2, gamma=0.95)

### Training


In [None]:
steps = 0
for e, epoch in enumerate(range(n_epoch)):
    progress_bar = qqdm(train_loader)
    for i, data in enumerate(progress_bar):
        imgs, labels = data
        imgs, labels = imgs.cuda(), labels.cuda()

        bs = imgs.size(0)

        #  Train D        
        noise = Variable(torch.randn(bs, z_dim), requires_grad=False).cuda()
        f_class = Variable(torch.LongTensor(np.random.randint(0, 10, bs)), requires_grad=False).cuda()
        r_imgs = Variable(imgs).cuda()
        f_imgs = G(noise, f_class)
        r_class = Variable(labels).cuda()

        # Forward
        r_source, r_class_pred = D(r_imgs.detach())
        f_source, f_class_pred = D(f_imgs.detach())

        # Compute the loss for the discriminator.
        r_c_loss = aux_criterion(r_class_pred, r_class)
        f_c_loss = aux_criterion(f_class_pred, f_class)
        C_loss_D = r_c_loss + f_c_loss

        # WGAN Loss
        loss_D = -torch.mean(r_source) + torch.mean(f_source)
       
        # Compute gradient penalty
        alpha_g = torch.rand(r_imgs.size(0), 1, 1, 1).cuda().expand_as(r_imgs)
        interpolated = Variable(alpha_g * r_imgs.data + (1 - alpha_g) * f_imgs.data, requires_grad=True)
        out = D(interpolated)[0]

        grad = torch.autograd.grad(outputs=out,
            inputs=interpolated,
            grad_outputs=torch.ones(out.size()).cuda(),
            retain_graph=True,
            create_graph=True,
            only_inputs=True)[0]

        grad = grad.view(grad.size(0), -1)
        grad_l2norm = torch.sqrt(torch.sum(grad ** 2, dim=1))
        d_loss_gp = torch.mean((grad_l2norm - 1) ** 2)

        # Backward + Optimize
        loss_D += lambda_gp * d_loss_gp
        loss_D += acgan_d_scale * C_loss_D

        D.zero_grad()
        loss_D.backward()
        opt_D.step()
        sch_D.step()

#---------------------------------------------------------------------------------------------------------------
        #  Train G
        if steps % n_critic == 0:
            # fake images
            for p in D.parameters():
                p.requires_grad = False
            for p in G.parameters():
                p.requires_grad = True
            
            noise = Variable(torch.randn(bs, z_dim), requires_grad=False).cuda()
            f_class = Variable(torch.LongTensor(np.random.randint(0, 10, bs)), requires_grad=False).cuda()

            f_imgs = G(noise, f_class)
            f_source, f_class_pred = D(f_imgs)
            loss_G = -torch.mean(f_source) + acgan_g_scale * aux_criterion(f_class_pred, f_class)

            G.zero_grad()
            loss_G.backward()

            # Update the generator.
            opt_G.step()
            sch_G.step()
            
            for p in D.parameters():
                p.requires_grad = True
            for p in G.parameters():
                p.requires_grad = False

        steps += 1
        
        progress_bar.set_infos({
            'Loss_D': round(loss_D.item(), 4),
            'Loss_G': round(loss_G.item(), 4),
            'Epoch': e+1,
            'Step': steps,
        })

    G.eval()
    f_imgs_sample = (G(z_sample, z_sample_label).data + 1) / 2.0
    
    grid_img = torchvision.utils.make_grid(f_imgs_sample.cpu(), nrow=10)
    plt.figure(figsize=(10,10))
    plt.imshow(grid_img.permute(1, 2, 0))
    plt.show()
    G.train()

    if (e+1) % 5 == 0 or e == 0:
        # Save the checkpoints.
        torch.save(G.state_dict(), os.path.join(ckpt_dir, 'G.pth'))
        torch.save(D.state_dict(), os.path.join(ckpt_dir, 'D.pth'))



### Load model 

In [None]:
import torch
z_dim = 128
z_sample_label = []
for i in range(100):
    for j in range(10):
        z_sample_label.append(j)
z_sample_label = Variable(torch.LongTensor(z_sample_label)).cuda()

G = Generator(z_dim)
G.load_state_dict(torch.load(os.path.join("/content", 'DLCV2_2.pth')))
G.eval()
G.cuda()

### Generate and show some images.


In [None]:
same_seeds(7414)
# Generate 1000 images and make a grid to save them.
n_output = 1000
z_sample = Variable(torch.randn(n_output, z_dim)).cuda()
imgs_sample = (G(z_sample, z_sample_label).data + 1) / 2.0

# Show 32 of the images.
grid_img = torchvision.utils.make_grid(imgs_sample[:100].cpu(), nrow=10)
plt.figure(figsize=(10,10))
plt.imshow(grid_img.permute(1, 2, 0))
plt.show()

In [None]:
# Save the generated images.
os.makedirs('output', exist_ok=True)
for digit in range(10):
    for num in range(100):
        torchvision.utils.save_image(imgs_sample[num * 10 + digit], f'output/{digit}_{str(num+1).zfill(3)}.png')

In [None]:
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F


def load_checkpoint(checkpoint_path, model):
    state = torch.load(checkpoint_path, map_location = "cuda")
    model.load_state_dict(state['state_dict'])
    print('model loaded from %s' % checkpoint_path)


class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

tfm = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])

In [None]:
import random

import torch
import numpy as np

net = Classifier()
path = "Classifier.pth"
load_checkpoint(path, net)

# GPU enable
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print('Device used:', device)
if torch.cuda.is_available():
    net = net.to(device)

test_label = []
for i in range(10):
    for j in range(100):
        test_label.append(i)
test_label = Variable(torch.LongTensor(test_label)).cuda()

test_path = "/content/output/"
test_set = ImgDataset(test_path, test_label, tfm)
test_loader = DataLoader(test_set, batch_size=1, num_workers=0, shuffle=False)

In [None]:
images = [(test_set[i][0]+1)/2 for i in range(100)]
grid_img = torchvision.utils.make_grid(images, nrow=10)
plt.figure(figsize=(10,10))
plt.imshow(grid_img.permute(1, 2, 0))
plt.show()

In [None]:
pred_count = 0
progress_bar = qqdm(test_loader)
for i, data in enumerate(progress_bar):
    img, label = data
    img, label = img.cuda(), label.cuda()

    logit = net(img)
    pred = logit.argmax(dim=-1)
    if(pred == label):
        pred_count += 1


In [None]:
pred_count