In [1]:
from __future__ import print_function
import argparse
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image

import matplotlib.pyplot as plt
import numpy as np

In [2]:
cuda = torch.cuda.is_available()
batch_size = 128
log_interval = 10
num_epochs = 10

torch.manual_seed(1) # args.seed

device = torch.device("cuda" if cuda else "cpu") # args.cuda
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {} # args.cuda

# Get train and test data
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True, **kwargs)



In [None]:
# original VAE model
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(784, 400)
        self.fc1a = nn.Linear(400, 100)
        self.fc21 = nn.Linear(100, 2) # Latent space of 2D
        self.fc22 = nn.Linear(100, 2) # Latent space of 2D
        self.fc3 = nn.Linear(2, 100) # Latent space of 2D
        self.fc3a = nn.Linear(100, 400)
        self.fc4 = nn.Linear(400, 784)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        h2 = F.relu(self.fc1a(h1))
        return self.fc21(h2), self.fc22(h2)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        h4 = F.relu(self.fc3a(h3))
        return torch.sigmoid(self.fc4(h4))

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, 784))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar


model = VAE().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f"trainable parameters: {params:,}")

# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD # -ELBO


def train(epoch, model):
    model.train() # so that everything has gradients and we can do backprop and so on...
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad() # "reset" gradients to 0 for text iteration
        recon_batch, mu, logvar = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward() # calc gradients
        train_loss += loss.item()
        optimizer.step() # backpropagation

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))


def test(epoch, model):
    model.eval()
    test_loss = 0
    with torch.no_grad(): # no_grad turns of gradients...
        for i, (data, _) in enumerate(test_loader):
            data = data.to(device)
            recon_batch, mu, logvar = model(data)
            test_loss += loss_function(recon_batch, data, mu, logvar).item()

    test_loss /= len(test_loader.dataset)
    print('====> Test set loss: {:.4f}'.format(test_loss))


for epoch in range(1, num_epochs + 1):
    train(epoch, model)
    #torch.save(model.state_dict(), "weights/VAE.pth")
    test(epoch, model)
    with torch.no_grad():
        sample = torch.randn(64, 2).to(device) # 20 -> 2
        sample = model.decode(sample).cpu()
        save_image(sample.view(64, 1, 28, 28),
                   'results/VAE/sample_' + str(epoch) + '.png')

In [None]:
class convVAE(nn.Module):
    def __init__(self, in_channels=1, out_channels = 32, hiddenDim=64, kernel_size = 3):
        super(convVAE, self).__init__()
        w = 28+1+1-2*kernel_size
        features=torch.tensor([out_channels,w,w])
        self.num_features = torch.prod(features, dim=0).item()
        self.ft_shape = features
        out_channels1 = 16
        self.conv1 = nn.Conv2d(in_channels, out_channels1, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(out_channels1, out_channels=out_channels, kernel_size=kernel_size)
  
        
        self.enFc1 = nn.Linear(self.num_features, hiddenDim)
        self.enFc2 = nn.Linear(self.num_features, hiddenDim)

        self.deFc1 = nn.Linear(hiddenDim, self.num_features)
        self.deConv1 = nn.ConvTranspose2d(out_channels, out_channels1, kernel_size=kernel_size)
        self.deConv2 = nn.ConvTranspose2d(out_channels1, in_channels, kernel_size=kernel_size)


    def encode(self, x):
        h1 = F.relu(self.conv1(x))
        h2 = F.relu(self.conv2(h1))
        
        h3 = h2.view(-1, self.num_features)
        return self.enFc1(h3), self.enFc2(h3)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        h4 = F.relu(self.deFc1(z))
        h5 = F.relu(self.deConv1(h4.view(-1, self.ft_shape[0], self.ft_shape[1], self.ft_shape[2])))
        return torch.sigmoid(self.deConv2(h5))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

z = 2
model = convVAE(out_channels = 64, kernel_size = 4, hiddenDim=z).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)


# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x, size_average=False)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD # -ELBO

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f"trainable parameters: {params:,}")

In [None]:
for epoch in range(1, num_epochs + 1):
    train(epoch, model)
    #torch.save(model.state_dict(), "weights/convVAE.pth")
    test(epoch, model)
    with torch.no_grad():
        sample = torch.randn(64, z).to(device)
        sample = model.decode(sample).cpu()
        save_image(sample.view(64, 1, 28, 28),
                   'results/convVAE/sample_' + str(epoch) + '.png')

In [None]:
# A2
# https://colab.research.google.com/drive/1sjy9odlSSy0RBVgMTgP7s99NXsqglsUL?usp=sharing#scrollTo=Rj17psVw7Shg
# https://www.youtube.com/watch?v=a4Yfz2FxXiY&t=3s 

In [101]:
def extract(input, t, x):
    shape = x.shape
    out = torch.gather(input, 0, t.to(input.device))
    reshape = [t.shape[0]] + [1] * (len(shape) - 1)
    return out.reshape(*reshape)

In [110]:
#https://nn.labml.ai/diffusion/ddpm/index.html
#https://github.com/azad-academy/denoising-diffusion-model/blob/main/diffusion_model_demo.ipynb

class denoisingDiffusion:
    def __init__(self, model : nn.Module, T=1000, device='cpu'):
        self.T = T
        start = 1e-5; end = 1e-2
        self.betas = torch.linspace(start, end, T, device=device)
        self.alphas = 1 - self.betas
        self.alphas_bar = torch.cumprod(self.alphas, dim=0)
        self.device = device
        self.model = model

    def sample_q(self, x0, t, noise): #this and 'loss()' makes algorithm 1
        if noise is None:
            noise = torch.rand_like(x0)
        a_t = extract(torch.sqrt(self.alphas_bar), t, x0)
        one_minus_a_t = extract(torch.sqrt(1-self.alphas_bar), t, x0)
        return (a_t * x0  + one_minus_a_t * noise)

    def loss(self, x0, noise=None):
        batch_size = x0.shape[0]
        t = torch.randint(0, self.T, (batch_size,), device=x0.device, dtype=torch.long)
        if noise is None:
            noise=torch.rand_like(x0)
        x_t = self.sample_q(x0, t, noise)
        eps_theta = self.model(x_t, t)

        return F.mse_loss(noise, eps_theta)

    def sample_p(self, x_t, t):
        alpha = extract(self.alphas, t, x_t)
        alpha_bar = extract(self.alphas_bar, t, x_t)
        params = self.model(x_t, t)
        coef = self.betas / (extract(torch.sqrt(1-self.alphas_bar), t, x_t))
    
        print(params.shape)
        print(coef.shape)
        print((coef * params))
        mean = 1 / torch.sqrt(alpha) * (x_t - coef * params)
        var = extract(self.betas, t)
        eps = torch.randn_like(x_t, device=x_t.device)
        return mean + torch.sqrt(var) * eps

    def sample_p_loop(self, shape):
        x_t = torch.randn(shape, device=self.device)
        Xs = [x_t]
        for t in reversed(range(self.T)):
            x_t = self.sample_p(x_t, x_t.new_full((x_t.shape[0],), t, dtype=torch.long))
            Xs.append(x_t)
        
        return Xs
    


In [111]:
class deNoise(nn.Module):
    def __init__(self, in_channels=1, out_channels = 32, hiddenDim=64, kernel_size = 3):
        super(deNoise, self).__init__()
        
        w = 28+1+1-2*kernel_size
        features=torch.tensor([out_channels,w,w])
        self.num_features = torch.prod(features, dim=0).item()
        self.ft_shape = features
        out_channels1 = 16

        self.conv1 = nn.Conv2d(in_channels, out_channels1, kernel_size)
        self.conv2 = nn.Conv2d(out_channels1, out_channels, kernel_size)
        self.fc1 = nn.Linear(self.num_features, hiddenDim)
        
        self.fc2 = nn.Linear(hiddenDim, self.num_features)
        self.conv3 = nn.ConvTranspose2d(out_channels, out_channels1, kernel_size)
        self.conv4 = nn.ConvTranspose2d(out_channels1, in_channels, kernel_size)
        
        

    def forward(self, x, t):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = torch.sigmoid(self.fc1(x.view(-1, self.num_features)))
        x = F.relu(self.fc2(x))
        x = F.relu(self.conv3(x.view(-1, self.ft_shape[0], self.ft_shape[1], self.ft_shape[2])))
        x = torch.sigmoid(self.conv4(x))
        return x


In [112]:
eps_model = deNoise()
eps_model.to(device)
diff = denoisingDiffusion(eps_model, device=device)
optimizer = optim.Adam(eps_model.parameters(), lr=1e-3)
eps_model.train() # so that everything has gradients and we can do backprop and so on...
train_loss = 0
for epoch in range(1, 2):
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad() # "reset" gradients to 0 for text iteration
        
        loss = diff.loss(data, None)
        loss.backward() # calc gradients
        train_loss += loss.item()
        optimizer.step() # backpropagation

    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(train_loader.dataset)))
    if False:
        with torch.no_grad():
            sample = torch.randn(64, 2).to(device)
            t = torch.arange(diff.T, 0, step=-1, device=device)
            sample = diff.sample_p(sample, t)
            save_image(sample.view(64, 1, 28, 28).cpu(),
                        'results/diffusion/sample_' + str(epoch) + '.png')

====> Epoch: 1 Average loss: 0.0007


In [113]:
with torch.no_grad():
    samples = diff.sample_p_loop(data.shape)
    save_image(sample.view(64, 1, 28, 28).cpu(),
                'results/diffusion/sample_' + str(epoch) + '.png')

torch.Size([96, 1, 28, 28])
torch.Size([96, 1, 1, 1000])


RuntimeError: The size of tensor a (1000) must match the size of tensor b (28) at non-singleton dimension 3