# Dr LIM - Dimensionality reduction by Learning Invariant Mapping

- This paper (similar to TSNE) proposes an alternative method to achieve dimensionality reduction

In [1]:
from functools import reduce
import numpy as np

import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim

from image_utilities import plot_images

## CNN used in the paper 

![CNN architecture](./images/cnn.png)

### Torch implementation 

#### Forward

In [10]:
class DrlimCNN(nn.Module):
    def __init__(self):
        super(DrlimCNN, self).__init__()
        
        # Layer 1:
        # n_input_channel = 1
        # n_output_channel = 15
        # Kernel Size = 5 for padding = 0, stride = 1
        k_size = 5
        in_channels = 1
        out_channels = 15 
        self.layer_1 = nn.Conv2d(in_channels, out_channels, k_size)
        
        # Layer 2: Subsampling - Maxpooling
        # Kernel Size = 15  for padding=0 and stride = 1
        k_size = 15
        self.max_pooling = nn.MaxPool2d(k_size, stride=1)
        
        # Layer 3: Conv layer
        # n_input_channel = 15
        # n_output_channel = 30
        # Kernel size = 10
        in_channels = 15
        out_channels = 30
        k_size = 10
        self.layer_3 = nn.Conv2d(in_channels, out_channels, k_size)
        
        # Layer 4: Fully connected
        self.output_layer = nn.Linear(30, 2)
        
        # Output probs
        self.softmax = nn.Softmax(dim=1)
        
        # Relu
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # Add relu on top of conv layer
        x = self.layer_1(x)
        x = self.relu(x)
        
        print("After Layer 1:")
        print(x.shape)
        
        # Maxpool 
        x = self.max_pooling(x)
        
        # Another conv
        print("After Max Pool:")
        print(x.shape)
        
        x = self.layer_3(x)
        
        print("After Layer 3:")
        print(x.shape)
        
        # Get the size except for batch
        num_flat_features = reduce(lambda x, y: x * y, x.shape[1:])
        print(num_flat_features)
        
        # Flatten
        x = x.reshape(1, num_flat_features)
        
        # Fully connected 
        x = self.output_layer(x)
        #x = self.softmax(x)
        
        return x
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

net = DrlimCNN()

# Enable GPU
net.to(device)
net.cuda()

cuda:0


DrlimCNN(
  (layer_1): Conv2d(1, 15, kernel_size=(5, 5), stride=(1, 1))
  (max_pooling): MaxPool2d(kernel_size=15, stride=1, padding=0, dilation=1, ceil_mode=False)
  (layer_3): Conv2d(15, 30, kernel_size=(9, 9), stride=(1, 1))
  (output_layer): Linear(in_features=30, out_features=2, bias=True)
  (softmax): Softmax(dim=1)
  (relu): ReLU()
)

#### Backward

##### Define contrastive loss function

In [11]:
def contrastive_loss(output_1, output_2, 
            target_1, target_2):
    if target_1 == target_2:
        y = torch.zeros_like(output_1, requires_grad=True)
    else:
        y = torch.ones_like(output_1, requires_grad=True)
        
    distance = torch.norm(output_1 - output_2)
    ls = torch.pow(distance, 2)
    ld = torch.max(torch.zeros_like(output_1), distance)
    ld = torch.pow(ld, 2)
    
    loss = torch.mean((1 - y) * ls + y * ld)
    
    return loss

In [12]:
# Test Code
input_1 = torch.randn(1, 1, 28, 28)
input_2 = torch.randn(1, 1, 28, 28)

out_1 = net(input_1.cuda())
out_2 = net(input_2.cuda())
out_1 = out_1.requires_grad_(True)
out_2 = out_2.requires_grad_(True)

print("---------")
print(out_1.requires_grad)
print(out_2.requires_grad)
print("---------")

print("Before:")
print(net.output_layer.weight.grad)
loss = contrastive_loss(out_1, out_2,
                        0, 0)
print(loss)
loss.backward()
print("After:")
print(net.output_layer.weight.grad)

After Layer 1:
torch.Size([1, 15, 24, 24])
After Max Pool:
torch.Size([1, 15, 10, 10])
After Layer 3:
torch.Size([1, 30, 2, 2])
120


RuntimeError: size mismatch, m1: [1 x 120], m2: [30 x 2] at /opt/conda/conda-bld/pytorch_1570910687230/work/aten/src/THC/generic/THCTensorMathBlas.cu:290

## Load Data

In [5]:
n_epochs = 3
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10

train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('./files/', train=True, download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307,), (0.3081,))
                               ])),
    batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('./files/', train=False, download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307,), (0.3081,))
                               ])),
    batch_size=batch_size_test, shuffle=True)

## Train

In [6]:
n_epochs = 100
optimizer = optim.Adam(net.parameters(), lr=0.001)

all_data = []
all_target = []

for batch_idx, (data, target) in enumerate(train_loader):
    # Choose only either 5 or 10 number 
    filter_index = ((target == 4) | (target == 9))
    all_data += data[filter_index].numpy().tolist()
    all_target += target[filter_index].numpy().tolist()

for epoch in range(n_epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for i in range(len(all_data) - 1): 
        data_1 = torch.tensor(all_data[i])
        data_1 = torch.unsqueeze(data_1, 0)
        target_1 = torch.tensor(all_target[i])
        
        data_2 = torch.tensor(all_data[i + 1])
        data_2 = torch.unsqueeze(data_2, 0)
        target_2 = torch.tensor(all_target[i + 1])
        
        print("--------")
        print(data_1.shape)
        print(data_2.shape)
        print("--------")
        
        optimizer.zero_grad()

        # Enable GPUs
        input_1, label_1 = data_1.to(device), target_1.to(device)
        input_2, label_2 = data_2.to(device), target_2.to(device)
        
        out_1 = net(input_1)
        out_2 = net(input_2)
        out_1 = out_1.requires_grad_(True)
        out_2 = out_2.requires_grad_(True)

        loss = contrastive_loss(out_1, out_2,
                                label_1, label_2)

        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

--------
torch.Size([1, 1, 28, 28])
torch.Size([1, 1, 28, 28])
--------
Shape
torch.Size([1, 15, 5, 5])


RuntimeError: Calculated padded input size per channel: (5 x 5). Kernel size: (9 x 9). Kernel size can't be greater than actual input size