In [None]:
import os

import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch import distributions

%matplotlib inline

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Utility functions

In [None]:
def plot_imgs(samples, title=None):
    fig, ax = plt.subplots(2, 5, figsize=(10,4))
    fig.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)
    plt.rcParams.update({'font.size': 20})
    fig.suptitle(title)

    for i in range(2):
        for j in range(5):
            ax[i, j].imshow(samples[i*5 + j], cmap='gray')
            ax[i, j].axis('off')
    plt.show()

In [None]:
def load_data(data_dir, dataset):
    if dataset == 'MNIST':
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        points = torchvision.datasets.MNIST(root=data_dir, train=True,
                                               transform=transforms.ToTensor(),download=True)
        
    elif dataset == 'MOONS':
        moon1 = [(0.5 + r*np.cos(t), r*np.sin(t)) 
                 for t in np.arange(0, np.pi, 0.01) for r in np.arange(0.9, 1.1, 0.01)]
        moon2 = [(-0.5 + r*np.cos(t), r*np.sin(t)) 
                 for t in np.arange(np.pi, 2*np.pi, 0.01) for r in np.arange(0.9, 1.1, 0.01)]
        points = moon1 + moon2
        points = torch.tensor(points)
        
    elif dataset == 'MLINPL':
        img = cv2.imread(data_dir + r'MLinPL.png',0) / 255
        n, m = img.shape
        n, m = int(0.2*n), int(0.2*m)
        img = cv2.resize(img, (m,n))

        points = []
        for i in range(n):
            for j in range(m):
                if img[i,j] == 0:
                    points.append((j,-i))

        points = np.array(points)
        points = (points - np.mean(points, axis=0, keepdims=True)) / np.std(points, axis=0, keepdims=True)
        points = torch.from_numpy(points)
        
    return points

# Model implementation

Implement Coupling layer of the NICE model here. You will need to fill the body of 3 functions: 
* `forward` - forward pass of the NICE
$$\begin{cases}
y_1 =& x_1\\ 
y_2 =& x_2 + m(x_1)
\end{cases}$$
* `inverse` - inversion of the forward pass
$$\begin{cases}
x_1 =& y_1\\ 
x_2 =& y_2 - m(x_1)
\end{cases}$$
* `get_mask` - swap of the processed part of latent code
$$\begin{cases}
y_{I_1} =& y_{I_2}\\ 
y_{I_2} =& y_{I_1}
\end{cases}$$


In [None]:
class Coupling_layer(nn.Module):        
    def __init__(self, device, input_dim, data_dim, n_layers, mask_type, hidden_dim=1024):
        super(Coupling_layer, self).__init__()
        
        self.device = device
        self.mask = self.get_mask(data_dim, mask_type)
        
        net = [nn.Linear(input_dim, hidden_dim), nn.ReLU()]
        for i in range(n_layers-2):
            net.append(nn.Linear(hidden_dim, hidden_dim))
            net.append(nn.ReLU())
        net.append(nn.Linear(hidden_dim, input_dim))
        self.net = nn.Sequential(*net)

    
    def forward(self, x):
        z = x.view(x.shape[0], -1)
        h1, h2 = z * self.mask, z * (1 - self.mask)
        
        #h1 = h1
        h2 = h2 + self.net(h1) * (1 - self.mask)
        
        z = h1 + h2
        
        return z.view(x.shape)
    
    
    def inverse(self, z):
        x = z.view(z.shape[0], -1)
        h1, h2 = x * self.mask, x * (1 - self.mask)
        
        #h1 = h1
        h2 = h2 - self.net(h1) * (1 - self.mask)
        
        x = h1 + h2 
        
        return x.view(z.shape)
    
    
    def get_mask(self, data_dim, mask_type: int):
        # Return binary (0 and 1) mask with 1's in first half for mask_type=0, and in the second half for mask_type=1.
        
        self.mask = torch.zeros(data_dim)
        
        if mask_type == 0:
            self.mask[:,:data_dim[1]//2] = 1
            
        elif mask_type == 1:
            self.mask[:,data_dim[1]//2:] = 1
            
        return self.mask.view(1,-1).to(self.device)

Implement scaling layer here. You will need a body for 2 functions:
* `forward` - forward pass of the layer. Return scaled output and logarithm of the determinant.
$$ \text{out} = (x * \exp(\log s), \log \det s)$$
If we keep logarithms of the scales $\log s$ then: 
$$\log \det s = \sum_{i=0}^{n} s_i,$$
where $n$ is the legnth of the diagonal.
* `inverse` - inversion of the scaling layer
$$\text{out} = z * \exp(-\log s)$$

In [None]:
class Scaling_layer(nn.Module):        
    def __init__(self, data_dim):
        super(Scaling_layer, self).__init__()
        
        self.log_scale = nn.Parameter(torch.FloatTensor(data_dim).fill_(0.0))
    
    
    def forward(self, x):
        scale_logdet = torch.sum(self.log_scale)
        return torch.exp(self.log_scale) * x, scale_logdet
    
    
    def inverse(self, z):
        return torch.exp(-self.log_scale) * z

Here, you will need to implement two functions, responsible for calculating flow and inverse flow:
* `flow` - compositionn of coupling layers on an input $x$ - $f_K(f_{K-1}(\ldots f_1(x)))$ and scaling layer at the end. Returns $z$ and $\log \det s$
* `inv_flow` - compositionn of inversion and scaling layer and inversion of coupling layers on an encoding $z$. Returns $x$

In [None]:
class NICE():
    def __init__(self, input_dim, data_dim, n_layers, n_couplings, device):
        self.coupling_layers = []
        for i in range(n_couplings):
            if i%2 == 0:
                self.coupling_layers.append(Coupling_layer(device, input_dim, data_dim, n_layers, 0).to(device))
            else:
                self.coupling_layers.append(Coupling_layer(device, input_dim, data_dim, n_layers, 1).to(device))
        self.scaling_layer = Scaling_layer(data_dim).to(device)
    
    
    def flow(self, x):
        for layer in self.coupling_layers:
            x = layer(x)
        z, scale_logdet = self.scaling_layer(x)
        
        return z, scale_logdet
    
    
    def inv_flow(self, z):
        x = self.scaling_layer.inverse(z)
        for layer in self.coupling_layers[::-1]:
            x = layer.inverse(x)
        
        return x
    
    
    def init_weights(self):
        for layer in self.coupling_layers:
            layer.apply(self.init_weights_helper)
    
    
    def init_weights_helper(self, Layer):
        name = Layer.__class__.__name__
        if name == 'Linear':
            torch.nn.init.normal_(Layer.weight, mean=0, std=0.02)
            if Layer.bias is not None:
                torch.nn.init.constant_(Layer.bias, 0)
    
    
    def get_parameters(self):
        parameters = []
        for layer in self.coupling_layers:
            parameters += list(layer.parameters())
        parameters += list(self.scaling_layer.parameters())
        
        return parameters
    
    
    def train_model(self, if_train=True):
        if if_train:
            for layer in self.coupling_layers:
                layer.train()
            self.scaling_layer.train()
        else:
            for layer in self.coupling_layers:
                layer.eval()
            self.scaling_layer.eval()            

In [None]:
def loss_fun(z, prior_z, logdet):
    z = z.view(z.shape[0], -1)
    ll_z = prior_z.log_prob(z.cpu()).to(device) + logdet
    return -torch.mean(ll_z)

# Experiments

Here we perform experiments on 3 datasets: MNIST, Moons and MLinPL logo.

## MNIST

Load data

In [None]:
mnist = load_data(r'datasets/', 'MNIST')

mnist.data = (mnist.data.float() / 255. - 0.1307) / 0.3081
data = mnist.data
targets = mnist.targets
data = data[[idx for idx in range(len(targets)) if targets[idx] in [0,1,2,3,4]]]
targets = targets[[idx for idx in range(len(targets)) if targets[idx] in [0,1,2,3,4]]]

dataloader = DataLoader(data, batch_size=128, shuffle=True)

Model setup. Here we setup the prior distribution as a Multivariate Isometric Gaussian.

In [None]:
n_epochs = 1000
l_rate = 1e-3
n_layers = 6 # in each coupling layer
n_couplings = 6

data_dim = data.shape[1:]
input_dim = torch.prod(torch.tensor(data.shape[1:])).item()

prior_z = distributions.MultivariateNormal(torch.zeros(input_dim), torch.eye(input_dim))

nice = NICE(input_dim, data_dim, n_layers, n_couplings, device)
nice.init_weights()

optimizer = torch.optim.Adam(nice.get_parameters(), lr=l_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.9)

Model training

In [None]:
nice.train_model()

for i in range(n_epochs):
    loss_acc = 0
    for j, x in enumerate(dataloader):
        x = (x.float() + torch.rand(x.shape)).to(device) / 255
        z, logdet = nice.flow(x)
        loss = loss_fun(z, prior_z, logdet)
        loss_acc += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    scheduler.step()
    
    if i%5 == 0:
        print('Epoch: {}/{} Loss: {:.4f}'.format(i+1, n_epochs, loss_acc / (j+1)))
        with torch.no_grad():
            samples = torch.randn(10,28,28).to(device)
            samples = nice.inv_flow(samples)
            plot_imgs(samples.cpu().numpy())

Model evaluation

In [None]:
nice.train_model(False)

with torch.no_grad():
    samples = torch.randn(10,28,28).to(device)
    samples = nice.inv_flow(samples)
    plot_imgs(samples.cpu().numpy(), None)

## Moons

Load data

In [None]:
moons = load_data(r'datasets/', 'MOONS')

plt.scatter(moons[:,0], moons[:,1], c='b')
plt.axis('off')
plt.show()

Model setup. Here we setup the prior distribution as a Multivariate Isometric Gaussian.

In [None]:
n_epochs = 10000
l_rate = 1e-3
n_layers = 6 # in each coupling layer
n_couplings = 6

data_dim = (1,2)
input_dim = 2

prior_z = distributions.MultivariateNormal(torch.zeros(input_dim), torch.eye(input_dim))

nice2D = NICE(input_dim, data_dim, n_layers, n_couplings, device)
nice2D.init_weights()

optimizer = torch.optim.Adam(nice2D.get_parameters(), lr=l_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.9)

Model training

In [None]:
nice2D.train_model()
moons = moons.float().to(device)

for i in range(n_epochs):
    loss_acc = 0
    x = moons + 1e-2 * torch.rand(moons.shape).to(device)
    
    z, logdet = nice2D.flow(x)
    loss = loss_fun(z, prior_z, logdet)
    loss_acc += loss.item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()
    
    if i%50 == 0:
        print('Epoch: {}/{} Loss: {:.4f}'.format(i+1, n_epochs, loss.item()))
        with torch.no_grad():
            samples = torch.randn(1000,2).to(device)
            samples = nice2D.inv_flow(samples).view(-1,2)
            plt.scatter(samples[:,0].cpu().numpy(), samples[:,1].cpu().numpy(), c='black')
            plt.axis('off')
            plt.show()

Model evaluation

In [None]:
nice2D.train_model(False)

with torch.no_grad():
    samples = torch.randn(1000,2).to(device)
    samples = nice2D.inv_flow(samples).view(-1,2)
    plt.scatter(samples[:,0].cpu().numpy(), samples[:,1].cpu().numpy(), c='black', alpha=0.1)
    plt.axis('off')
    plt.show()

## Sign

Load data

In [None]:
points = load_data(r'images/', 'MLINPL')

plt.scatter(points[:,0], points[:,1], c='b')
plt.axis('off')
plt.show()

Model setup. Here we setup the prior distribution as a Multivariate Isometric Gaussian.

In [None]:
n_epochs = 10000
l_rate = 1e-3
n_layers = 6 # in each coupling layer
n_couplings = 6

data_dim = (1,2)
input_dim = 2

prior_z = distributions.MultivariateNormal(torch.zeros(input_dim), torch.eye(input_dim))

nice_sign = NICE(input_dim, data_dim, n_layers, n_couplings, device)
nice_sign.init_weights()

optimizer = torch.optim.Adam(nice_sign.get_parameters(), lr=l_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.9)

Model training

In [None]:
nice_sign.train_model()
points = points.float().to(device)

for i in range(n_epochs):
    loss_acc = 0
    x = points + 1e-4 * torch.rand(points.shape).to(device)
    
    z, logdet = nice_sign.flow(x)
    loss = loss_fun(z, prior_z, logdet)
    loss_acc += loss.item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()
    
    if i%50 == 0:
        print('Epoch: {}/{} Loss: {:.4f}'.format(i+1, n_epochs, loss.item()))
        with torch.no_grad():
            samples = torch.randn(1000,2).to(device)
            samples = nice_sign.inv_flow(samples).view(-1,2)
            plt.scatter(samples[:,0].cpu().numpy(), samples[:,1].cpu().numpy(), c='black')
            plt.axis('off')
            plt.show()

Model evaluation

In [None]:
nice_sign.train_model(False)

with torch.no_grad():
    samples = torch.randn(1000,2).to(device)
    samples = nice_sign.inv_flow(samples).view(-1,2)
    plt.scatter(samples[:,0].cpu().numpy(), samples[:,1].cpu().numpy(), c='black', alpha=0.1)
    plt.axis('off')
    plt.show()