In [1]:
import os
import numpy as np
import math
import itertools
import scipy
import sys
import time
import datetime

import torchvision.transforms as transforms
from torchvision.utils import save_image

from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.autograd as autograd
from torchvision.utils import make_grid

import torch.nn as nn
import torch.nn.functional as F
import torch

import glob
import random
from torch.utils.data import Dataset
from PIL import Image

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from IPython.display import clear_output
from icecream import ic

In [2]:
class Hyperparameters(object):
    
      def __init__(self, **kwargs):
            self.__dict__.update(kwargs)

hp = Hyperparameters(
    epoch=0,
    n_epochs=200,
    batch_size=8,        
    dataset_train_mode="train",
    dataset_test_mode="val",    
    lr=.0002,    
    b1=.5,
    b2=0.999,
    n_cpu=8,
    img_size=128,
    channels=3,
    latent_dim=8,
    n_critic=5,
    sample_interval=400,
    lambda_pixel=10,
    lambda_latent=.5,
    lambda_kl=.01)
img_root_folder = 'C:\\Users\\USER\\Desktop\\GANs\\facades'


In [3]:
class ImageDataset(Dataset):

    def __init__(self,root,transforms_=None,mode='train'):
        self.transforms = transforms.Compose(transforms_)

        self.files = sorted(glob.glob(os.path.join(root, mode) + "/*.*"))
        if mode == "train":
            self.files.extend(sorted(glob.glob(os.path.join(root, "test") + "/*.*")))

    def __getitem__(self,index):
        img = Image.open(self.files[index%len(self.files)]),
        w,h=img.size
        img_A = img.crop((0,0,w/2,h))
        img_B = img.crop((w/2,0,w,h))

        if np.random.random() < 0.5:
            
            img_A = Image.fromarray(np.array(img_A)[:,::-1,:],"RGB")
            img_B = Image.fromarray(np.array(img_B)[:,::-1,:],"RGB")

        img_A = self.transform(img_A)
        img_B = self.transfrom(img_B)

        return {"A" : img_A, "B" : img_B}

    def __len__(self):
          return len(self.files)


In [4]:
def imshow(img,size=10):
    img = img/2+0.5
    npimg = img.numpu()
    plt.figure(figsize=size)
    plt.imshow(np.transpose(npimg,(1,2,0)))
    plt.show()

def visualize_output(path,w,h):
      img = mpimg.imread(path)
      plt.figure(figsize=(w,h))
      plt.imshow(img)
      plt.show()

In [5]:
transforms_ = [
    transforms.Resize((hp.img_size, hp.img_size), Image.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]

train_dataloader = DataLoader(
    ImageDataset(img_root_folder, mode=hp.dataset_train_mode, transforms_=transforms_),  # Fix here
    batch_size=hp.batch_size,
    shuffle=True,
    num_workers=1,
)

val_dataloader = DataLoader(
    ImageDataset(img_root_folder, mode=hp.dataset_test_mode, transforms_=transforms_),  # Fix here
    batch_size=16,
    shuffle=True,
    num_workers=1,
)

In [6]:
from torchvision.models import resnet18

In [7]:
def weight_class_init(m):
    
    classname = m.__class__.__name__
    if classname.find("Conv")!=-1:
        torch.nn.init.normal_(m.weight.data,0.0,0.2)
    if classname.find("BatchNorm2d")!=-1:
        torch.nn.init.normal_(m.weight.data,1.0,0.2)
        torch.nn.init.constant_(m.weight.data,0.0)

In [8]:
class UNetDown(nn.Module):
     def __init__(self,in_size,out_size,normalize=True,dropout=0.0):
            super(UNetDown,self).__init__()
            layers = [nn.Conv2d(in_size,out_size,3,stride=2,padding=1,bias=False)]
            if normalize:
                layers.append(nn.BatchNorm2d(out_size,0.8))
            layers.append(nn.LeakyReLU(0.2))
            self.model= nn.Sequential(*layers)
     
     def forward(self,x):
        return self.model(x)       
    
    
class UNetUp(nn.Module):
    def __init__(self,in_size,out_size):
        super(UNetUp,self).__init__()
        self.model=nn.Sequential(
                   nn.Upsample(scale_factor=2),
                   nn.Conv2d(in_size,out_size,3,stride=2,padding=1,bias=False),
                   nn.BatchNorm2d(out_size,0.8),
                   nn.ReLU(inplace=True),
        )
        
    def forward(self,x,skip_input):
        x=self.model(x),
        x=torch.cat((x,skip_input),1)
        return x
                   
            

In [9]:
class Generator(nn.Module):
    def __init__(self,latent_dim,img_shape):
        super(Generator,self).__init__()
        channels,self.h,self.w=img_shape
        
        self.fc = nn.Linear(latent_dim,self.h*self.w)
        
        self.down1=UNetDown(channels+1,64,normalize=True)
        self.down2=UNetDown(64,128)
        self.down3=UNetDown(128,256)
        self.down4=UNetDown(256,512)
        self.down5=UNetDown(512,512)
        self.down6=UNetDown(512,512)
        self.down7=UNetDown(512,512,normalize=False)
        self.up1=UNetUp(512,512)
        self.up2=UNetUp(1024,512)
        self.up3=UNetUp(1024,512)
        self.up4=UNetUp(1024,256)
        self.up5=UNetUp(215,128)
        self.up6=UNetUp(256,64)
        
        self.final = nn.Sequential(
                     nn.Upsample(scale_factor=2),
                     nn.Conv2d(128,channels,3,stride=1,padding=1),nn.Tanh())
        
        def forward(self,x,z):
            z = self.fc(z).view(z.size(0),1,self.h,self.w)
            d1=self.down1(torch.cat(x,z),1)
            d2=self.down2(d1)
            d3=self.down3(d2)
            d4=self.down2(d3)
            d5=self.down2(d4)
            d6=self.down2(d5)
            d7=self.down7(d6)
            u1=self.up1(d7,d6)
            u2=self.up2(u1,d5)
            u3=self.up3(u2,d4)
            u4=self.up4(u3,d3)
            u5=self.up5(u4,d2)
            u6=self.up6(u5,d1)
            
            return self.final(u6)
            

In [10]:
class Encoder(nn.Module):
    def __init__(self,latent_dim,input_shape):
        super(Encoder,self).__init__()
        resnet18_model = resnet18(pretrained=False)
        self.feature_extractor = nn.Sequential(*list(resnet18_model.children())[:-3])
        self.pooling = nn.AvgPool2d(kernel_size = 8, stride=8, padding=0)
        self.fc_mu = nn.Linear(256,latent_dim)
        self.fc_logvar = nn.Linear(25,latent_dim)
        
    def forward(self,img):
        out = self.feature_extractor(img)
        out = self.poolin(out)
        out = out.view(out.size(0),-1)
        mu = self.fc_mu(out)
        var = self.fc_logvar(out)
        return mu,var

In [15]:
class MultiDiscriminator(nn.Module):
        def __init__(self,input_shape):
            super(MultiDiscriminator,self).__init__()
            def discriminator_block(in_filters,out_filters,normalize=True):
                    layers = [nn.Conv2d(in_filters,out_filters,4,stride=2,padding=1)]
                    if normalize:
                        layers.append(nn.BatchNorm2d(out_filters,0.8))
                    layers.append(nn.LeakyReLU(0.8))
                    return layers
            
            channels,_,_=input_shape
            self.models = nn.ModuleList()
            for i in range(3):
                self.models.add_module(
                    "disc_%d"%i,
                    nn.Sequential(
                    *discriminator_block(channels,64,normalize=False),
                    *discriminator_block(64,128),
                    *discriminator_block(128,264),
                    *discriminator_block(264,512),
                    nn.Conv2d(512,1,3,padding=2)
                    ),
                )
            self.downsampling = nn.AvgPool2d(channels,stride=1,padding=[1,1],count_include_pad=False)
            
            def compute_loss(self,x,ground_truth):
                loss = sum([torch.mean((out-ground_truth)**2) for out in outputs])
                return loss
            
            def forward(self,x):
                outputs=[]
                for m in models:
                    outputs.append(m(x))
                    x=self.downsample(x)
                    return outputs

In [17]:
cuda = True if torch.cuda.is_available() else False
print("Using CUDA" if cuda else "Not using CUDA")

mae_loss= torch.nn.L1Loss()
input_shape=(hp.channels,hp.img_size,hp.img_size)

generator=Generator(hp.latent_dim,input_shape)
encoder = Encoder(hp.latent_dim,input_shape)

D_VAE = MultiDiscriminator(input_shape)
D_LR = MultiDiscriminator(input_shape)

if cuda:
    generator = generator.cuda()
    encoder.cuda()
    D_VAE.cuda()
    D_LR.cuda()
    mae_loss.cuda()
    
def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)  # Initialize convolutional layer weights
    elif classname.find("BatchNorm2d") != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)  # Initialize batch normalization weights
        nn.init.constant_(m.bias.data, 0.0)
    
generator.apply(weights_init_normal)
D_VAE.apply(weights_init_normal)
D_LR.apply(weights_init_normal)


Not using CUDA


MultiDiscriminator(
  (models): ModuleList(
    (0-2): 3 x Sequential(
      (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (1): LeakyReLU(negative_slope=0.8)
      (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (3): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
      (4): LeakyReLU(negative_slope=0.8)
      (5): Conv2d(128, 264, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (6): BatchNorm2d(264, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
      (7): LeakyReLU(negative_slope=0.8)
      (8): Conv2d(264, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (9): BatchNorm2d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
      (10): LeakyReLU(negative_slope=0.8)
      (11): Conv2d(512, 1, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    )
  )
  (downsampling): AvgPool2d(kernel_size=3, stride=1, padding=[1, 1])
)

In [None]:
def sample_images(batches_done):
    generator.eval()
    img = next(iter(val_dataloader))
    image_samples = None
    path = "C:\\Users\\USER\\Desktop\\GANs\\facades\\%s\\%s.png" % ('maps', batches_done)
    for img_A , img_B in zip(img["A"],img["B"]):
        real_A = img_A.view(1,*img_A.shape).repeat(hp.latent_dim,1,1,1)
        real_A = Variable(real_A.type(Tensor))
        sampled_z = Variable(Tensor(np.random.normal(0, 1, (hp.latent_dim, hp.latent_dim))))
        fake_B = generator(real_A, sampled_z)
        fake_B = torch.cat([x for x in fake_B.data.cpu()], -1)
        