#### 28/08/2019

What follows is a simple implementation of a convolutional autoencoder using patches extracted from full image mammographies.

In [4]:
import numpy as np
import torch
from torch import nn
from torchvision import models
from torchvision import datasets
from torchvision import transforms
from torchvision import utils


from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchsummary import summary

import matplotlib.pyplot as plt

from mm_patch.data import PatchesDataset
import mm_patch.transforms


import os,sys

### Import data

In [5]:
# Auxiliary function
def online_mean_and_std(loader):
    """Compute the mean and std for all pixels in an image.
    It does not require to have all the dataset loaded in RAM 
    as it calculates it one batch at a time.

        Var[x] = E[X^2] - E^2[X]
        
    ref: https://discuss.pytorch.org/t/computing-the-mean-and-std-of-dataset/34949/8
    """
    cnt = 0
    # Is there a better way of checking the # of channels?
    channels = next(iter(loader))['patch'].shape[1]
    fst_moment = torch.empty(channels)
    snd_moment = torch.empty(channels)

    for batch in loader:

        b, c, h, w = batch['patch'].shape
        nb_pixels = b * h * w
        sum_ = torch.sum(batch['patch'].float(), dim=[0, 2, 3])
        sum_of_square = torch.sum(batch['patch'].float() ** 2, dim=[0, 2, 3])
        fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
        snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels)

        cnt += nb_pixels

    return fst_moment, torch.sqrt(snd_moment - fst_moment ** 2)

In [5]:
# First I create a dataloader to calculate mean and std of the pixels.
# For the latter I will use `online_mean_and_std(loader)`
patches = PatchesDataset(data_path='./patches.pkl', transform = mm_patch.transforms.ToTensor())
loader = DataLoader(patches, batch_size=20, shuffle=False)
mean_px, std_px = online_mean_and_std(loader)
print(mean_px.dtype,std_px.dtype)

torch.float32 torch.float32


In [6]:
mean_px,std_px

(tensor([18968.8457]), tensor([7423.2759]))

After calculating mean and std of the dataset, I can build a dataset with the normalized data.

In [4]:
# Transformations
composed = transforms.Compose([ 
#                                 mm_patch.transforms.ToImage(),
                                mm_patch.transforms.ToTensor(),
                                mm_patch.transforms.Scale(),
#                                 mm_patch.transforms.GrayToRGB(),
                                mm_patch.transforms.Normalize(mean=[mean_px], std=[std_px] )
                            ])
# Set dataset
patches = PatchesDataset(data_path='./patches.pkl', transform = composed)

# Dataloader parameters
batch_size = 20
validation_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(patches)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(patches, batch_size=batch_size, 
                                           sampler=train_sampler, num_workers=4)
validation_loader = torch.utils.data.DataLoader(patches, batch_size=batch_size,
                                                sampler=valid_sampler, num_workers=4)

### Plot batch

In [1]:
# Helper function to show a batch
def show_patches_batch(sample_batched, max_images = None):
    """Show patches for a batch of samples."""
    patches_batch, targets_batch = \
            sample_batched['patch'], sample_batched['target']
    num_img = len(patches_batch)
    
    if max_images:
        num_img = max_images
    
    sqrt_num_img = np.ceil(np.sqrt(num_img))
    im_size = patches_batch.size(2)
#     print(im_size)
    grid_border_size = 2

    grid = utils.make_grid(patches_batch)
    
#     plt.imshow(grid.numpy().transpose((1, 2, 0)))
    fig = plt.figure(figsize = (10,8))
    for i in range(batch_size):
        ax = fig.add_subplot(sqrt_num_img, sqrt_num_img, i+1)
        ax.imshow(patches_batch[i][0])
        ax.set_title(f'Class: {patches.unique_labels[targets_batch[i]]}')
        ax.axis('off')

        plt.tight_layout()
#         plt.title('Batch from dataloader')
    plt.show()

for i_batch, sample_batched in enumerate(train_loader):
    print(i_batch, sample_batched['patch'].size(),
          sample_batched['target'].size())

    # observe 4th batch and stop.
    if i_batch == 3:
        plt.figure()
        show_patches_batch(sample_batched)
        plt.axis('off')
        plt.ioff()
        plt.show()
        break

NameError: name 'train_loader' is not defined

### Models
#### autoencoders

In [10]:
# Parameters
learning_rate = 1e-3

In [11]:
class autoencoder(nn.Module):
    def __init__(self, width=13):
        super(autoencoder, self).__init__()
        self.width = width
        self.encoder = nn.Sequential(
            nn.Linear(224 * 224, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True), 
            nn.Linear(64, self.width) )
            
        self.decoder = nn.Sequential(
            nn.Linear(self.width, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.ReLU(True), 
            nn.Linear(128, 224 * 224), 
            nn.Tanh())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x 
      
    def encode(self,x):
        return self.encoder(x)
    
    def decode(self,code):
        return self.decoder(code)

In [12]:
width=13
model = autoencoder(width=width)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

In [14]:
input_size = (1,50176,)
summary(model, input_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 128]       6,422,656
              ReLU-2               [-1, 1, 128]               0
            Linear-3                [-1, 1, 64]           8,256
              ReLU-4                [-1, 1, 64]               0
            Linear-5                [-1, 1, 13]             845
            Linear-6                [-1, 1, 64]             896
              ReLU-7                [-1, 1, 64]               0
            Linear-8               [-1, 1, 128]           8,320
              ReLU-9               [-1, 1, 128]               0
           Linear-10             [-1, 1, 50176]       6,472,704
             Tanh-11             [-1, 1, 50176]               0
Total params: 12,913,677
Trainable params: 12,913,677
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Fo

#### convolutional autoencoders

In [18]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        ## encoder layers ##
        # conv layer (depth from 1 --> 16), 3x3 kernels
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)  
        # conv layer (depth from 16 --> 4), 3x3 kernels
        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
        # pooling layer to reduce x-y dims by two; kernel and stride of 2
        self.pool = nn.MaxPool2d(2, 2)
        
        ## decoder layers ##
        ## a kernel of 2 and a stride of 2 will increase the spatial dims by 2
        self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)
        self.t_conv2 = nn.ConvTranspose2d(16, 1, 2, stride=2)


    def forward(self, x):
        ## encode ##
        # add hidden layers with relu activation function
        # and maxpooling after
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        # add second hidden layer
        x = F.relu(self.conv2(x))
        x = self.pool(x)  # compressed representation
        
        ## decode ##
        # add transpose conv layers, with relu activation function
        x = F.relu(self.t_conv1(x))
        # output layer (with sigmoid for scaling from 0 to 1)
        x = torch.sigmoid(self.t_conv2(x))
                
        return x

# initialize the NN
model = ConvAutoencoder()
print(model)



ConvAutoencoder(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (t_conv1): ConvTranspose2d(4, 16, kernel_size=(2, 2), stride=(2, 2))
  (t_conv2): ConvTranspose2d(16, 1, kernel_size=(2, 2), stride=(2, 2))
)


In [19]:
input_size = (1,224,224)
summary(model, input_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 224, 224]             160
         MaxPool2d-2         [-1, 16, 112, 112]               0
            Conv2d-3          [-1, 4, 112, 112]             580
         MaxPool2d-4            [-1, 4, 56, 56]               0
   ConvTranspose2d-5         [-1, 16, 112, 112]             272
   ConvTranspose2d-6          [-1, 1, 224, 224]              65
Total params: 1,077
Trainable params: 1,077
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 10.05
Params size (MB): 0.00
Estimated Total Size (MB): 10.24
----------------------------------------------------------------
