In [11]:
import os, sys
project_root_dir = os.path.join(os.getcwd(),'..')
if project_root_dir not in sys.path:
    sys.path.append(project_root_dir)

import torch, config
import IPDL
from autoencoder import SDAE, SDAE_TYPE, AE_CONV_UPSAMPLING
import numpy as np

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dropout Effect in Convolutional Layers

In [13]:
from torch import nn
model_name = 'ae_dropout_p5_sigmoid_skip'
activation_func = [[nn.ReLU(), nn.Sigmoid()], nn.ReLU(), nn.ReLU()]
model = SDAE([1, 6, 8, 16], SDAE_TYPE.conv, activation_func=activation_func, dropout=True,
              skip_connection=True, upsample=AE_CONV_UPSAMPLING.up_layer)

# model_name = 'ae_no_dropout_sigmoid'
# activation_func = [[nn.Sigmoid(), nn.Sigmoid()], nn.Sigmoid(), nn.Sigmoid()]
# model = SDAE([1, 6, 8, 16], SDAE_TYPE.conv, activation_func=activation_func, dropout=False,
#               skip_connection=False, upsample=AE_CONV_UPSAMPLING.up_layer)
# # remove dropout in last layer
# model.decode[-1][2] = nn.Sigmoid()
# # print(model.decode[-1])
# # model
# model
model

SDAE(
  (encode): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU()
      (4): Dropout2d(p=0.5, inplace=False)
      (5): MatrixEstimator(sigma=0.10, requires_optim=True)
    )
    (1): Sequential(
      (0): Conv2d(6, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU()
      (4): Dropout2d(p=0.5, inplace=False)
      (5): MatrixEstimator(sigma=0.10, requires_optim=True)
    )
    (2): Sequential(
      (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_m

In [14]:
from torchvision import datasets
from torchvision.transforms import ToTensor, Resize, Compose

transforms = Compose([
    Resize((64,64)),
    ToTensor()
])

train_set = datasets.FashionMNIST("dataset", train=True, download=True, transform=transforms)
test_set = datasets.FashionMNIST("dataset", train=False, download=True, transform=transforms)

from torch.utils.data import DataLoader
train_loader = DataLoader(train_set, batch_size=256, shuffle=True)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)

In [15]:
from functools import reduce
from IPDL.optim.func import silverman_optimize
from IPDL.functional import matrix_estimator
def get_Ax(x, gamma=.5):
    n = x.size(0)
    d = x.size(1) if len(x.shape)==2 else reduce(lambda x,y:x*y, x.shape[1:])

    sigma = silverman_optimize(x, gamma=gamma, normalize=True)
    print(sigma)
    _, Ax = matrix_estimator(x, sigma)
    return Ax

In [16]:
val_inputs = next(iter(test_loader))[0].to(device)
gamma = 2e-1
Ax = get_Ax(val_inputs, gamma=gamma)

12.784861157786843


In [17]:
from IPDL.InformationTheory import MatrixBasedRenyisEntropy as MRE

print("Input Entropy: {}".format(MRE.entropy(Ax)))

Input Entropy: 5.212742805480957


In [18]:
from IPDL.optim import SilvermanOptimizer
from torch.utils.tensorboard import SummaryWriter

from utils import TBLog

tb_writer = SummaryWriter('logs/{}/AE_FMNIST'.format(model_name))
tb_log = TBLog(model, tb_writer)

In [19]:
from torch import nn, tensor, zeros
from torch.nn.init import normal_

class WhiteNoise(nn.Module):
    ''' 
        Pre-imputation 
        
        Apply white noise defined by a Gaussian distribution
    '''
    def __init__(self):
        super(WhiteNoise, self).__init__()
    
    def forward(self, x):
        if self.training:
            noise = normal_(zeros(x.shape), 0, .05)
            noise = noise.to(x.get_device()) if x.get_device() != -1 else noise
            noisy_output = x.detach().clone() + noise
            noisy_output = ((noisy_output - noisy_output.min()) / noisy_output.max())
            return noisy_output
        
        return x
    
noise = WhiteNoise()

In [20]:
from torch.optim.lr_scheduler import ExponentialLR
from torch.optim import Adam
from IPDL.InformationPlane import AutoEncoderInformationPlane
from tqdm import tqdm

# gamma=8e-2
n_epoch = 50

# Optimizer
optimizer = Adam(model.parameters(), lr=5e-3)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
scheduler = ExponentialLR(optimizer, gamma=0.999)

# IPDL
#   Optimizer
matrix_optimizer = SilvermanOptimizer(model, gamma=gamma, normalize_dim=True)
#   InformationPlane
ip = AutoEncoderInformationPlane(model)

criterion = torch.nn.MSELoss()
model = model.to(device)

epoch_iterator = tqdm(
    range(n_epoch),
    leave=True,
    unit="epoch",
    postfix={"model": model_name,  "tls": "%.4f" % 1, "vls": "%.4f" % 1,},
)

for epoch in epoch_iterator:
    # IP, MI
    model.eval()
    model(val_inputs)

    if epoch == 0: # Solo necesario una vez
        matrix_optimizer.step()
    
    Ixt, Ity = ip.computeMutualInformation(Ax)
    MI = { 'MutualInformation/I(X,T)': {},
           'MutualInformation/I(T,Y)': {}  }
    for idx in range(len(Ixt)):
        scalar_name = 'CL{}'.format(idx)
        MI['MutualInformation/I(X,T)'][scalar_name] = Ixt[idx]
        MI['MutualInformation/I(T,Y)'][scalar_name] = Ity[idx]

    # tb_log.log(MI, epoch-1, include_conv=True, transform=inv_normalize)
    tb_log.log(MI, epoch-1, include_conv=True)

    # Train
    model.train()
    loss_tr = []
    for step, (input, _) in enumerate(train_loader):
        input = input.to(device)
        input_clean = input.detach().clone()
        if np.random.binomial(1, p=0.2):
            input = noise(input)
        output = model(input)
        batch_loss = torch.sqrt(criterion(output, input_clean))
        loss_tr.append(batch_loss.detach().item())
        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    with torch.no_grad():
        # Validation
        model.eval()
        loss_ts = []
        for step, (input, _) in enumerate(test_loader):
            input = input.to(device)
            output = model(input)
            batch_loss = torch.sqrt(criterion(output, input))
            loss_ts.append(batch_loss.detach().cpu().numpy())

        scheduler.step()

        epoch_iterator.set_postfix(
            model=model_name, tls="%.4f" % np.mean(loss_tr), vls="%.4f" % np.mean(loss_ts),
        )

    scalars = { 'Loss': {'Train' : np.mean(loss_tr), 'Test': np.mean(loss_ts)},
            'Learning Rate': scheduler.get_last_lr()[0]}
    
    inputs = val_inputs[:4] 
    outputs = model(inputs)
    tb_log.log(scalars, epoch, include_conv=False, input=inputs.reshape((4,1,64,64)), output=outputs.reshape((4,1,64,64)))

 66%|██████▌   | 33/50 [08:30<04:22, 15.47s/epoch, model=ae_dropout_p5_sigmoid_skip, tls=0.1703, vls=0.0507]


KeyboardInterrupt: 

In [20]:
np.sqrt(0.05), np.sqrt(0.25) 

(0.22360679774997896, 0.5)

In [61]:
model

SDAE(
  (encode): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Sigmoid()
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): MatrixEstimator(sigma=6.27, requires_optim=True)
    )
    (1): Sequential(
      (0): Conv2d(6, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Sigmoid()
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): MatrixEstimator(sigma=3.61, requires_optim=True)
    )
    (2): Sequential(
      (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Sigmoid()
      (3): MaxPool2d(kernel_size=3, stride=