<a href="https://colab.research.google.com/github/Kwanikaze/vpandas/blob/master/AE_OHE_8digits.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np

## Generate Data

In [2]:
def generate_data(num=8):
    """ Generate 'num' number of one-hot encoded integers. """ 
    x_train = np.eye(num)[np.arange(num)]                       # This is a simple way to one-hot encode integers
    
    # Repeat x_train multiple times for training
    x_train = np.repeat(x_train, 100, axis=0)
    
    # The target is x_train itself!
    x_target = x_train.copy()
    return x_train, x_target

In [3]:
num = 8
np.random.seed(10)
x_train, x_target = generate_data(num=num)

In [4]:
print(x_train)
print(np.shape(x_train))
print(np.shape(x_target))

[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]
(800, 8)
(800, 8)


## Autoencoder Parameters

In [5]:
#Parameters
latent_dims = 3
num_epochs = 2000
batch_size = 64
learning_rate = 1e-3
use_gpu = True

## Autoencoder Definition
https://medium.com/pytorch/implementing-an-autoencoder-in-pytorch-19baa22647d1

https://gist.github.com/AFAgarap/4f8a8d8edf352271fa06d85ba0361f26

In [6]:
class Autoencoder(nn.Module):
    def __init__(self, latent_dims):
        super().__init__()
        #self.encoder_hidden_layer = nn.Linear(in_features=kwargs["input_shape"],)
        self.encoder_layer = nn.Linear(in_features=num, out_features = latent_dims)
        nn.init.xavier_normal_(self.encoder_layer.weight)
        self.decoder_layer = nn.Linear(in_features = latent_dims, out_features = num)
        nn.init.xavier_normal_(self.decoder_layer.weight)
    
    def forward(self,features,latent_dims):
        x_input = self.encoder_layer(features)
        z = torch.sigmoid(x_input)
        #z.view(-1,latent_dims)
        if z.size()[0] == latent_dims: #resize from [3] to [1,3]
            z = z.view(1, latent_dims)
        #print(self.decoder_layer(z))
        #recon = torch.sigmoid(self.decoder_layer(z))
        softmax = nn.Softmax(dim=1)
        recon = softmax(self.decoder_layer(z))
        return recon

## Train Autoencoder

In [7]:
def trainAE(AE,latent_dims):
  AE.train()
  x_train, x_target = generate_data(num=num)
  inds = list(range(x_train.shape[0]))
  N = x_train.shape[0] # 800
  freq = num_epochs // 10 # floor division

  loss_hist = []
  x_train = Variable(torch.from_numpy(x_train))
  x_target = Variable(torch.from_numpy(x_target))
  for epoch in range(num_epochs):
      inds = np.random.permutation(inds)
      x_train = x_train[inds]
      x_train = x_train.to(device)
      x_target = x_target[inds]
      x_target = x_target.to(device)
      
      loss = 0
      num_batches = N / batch_size
      for b in range(0, N, batch_size):
          #get the mini-batch
          x_batch = x_train[b: b+batch_size]
          x_target_batch = x_target[b: b+batch_size]
          
          #feed forward
          batch_recon = AE(features=x_batch.float(), latent_dims = latent_dims)
          
          # Error
          #Convert x_batch from OHE vectors to single scalar for target class, of each sample in batch 
          _, x_batch_targets = x_batch.max(dim=1)
          train_loss = criterion(batch_recon, x_batch_targets)
          #print(batch_recon.size())
          #print(x_batch_targets.size())
          loss += train_loss.item() / N # update epoch loss
          
          #Backprop the error, compute the gradient
          optimizer.zero_grad()
          train_loss.backward()
          
          #update parameters based on gradient
          optimizer.step()
          
      #Record loss per epoch        
      loss_hist.append(loss)
      
      if epoch % freq == 0:
          print()
          print("Epoch %d/%d\tloss=%.5f" % (epoch + 1, num_epochs, loss), end='\t', flush=True)
          
          #Test with all training data
          AE.eval()
          train_recon = AE(features = x_train.float(),latent_dims=latent_dims)
          _, x_targets = x_target.max(dim=1)
          l = criterion(train_recon, x_targets)
          print("Test loss: {:.5f}".format(l.item()), end='')
      
  print("\nTraining finished!")

## Latent dimensions set to 3

In [8]:
#  use gpu if available
device = torch.device("cuda:0" if use_gpu and torch.cuda.is_available() else "cpu")
AE = Autoencoder(latent_dims=3)
AE = AE.to(device)
num_params = sum(p.numel() for p in AE.parameters() if p.requires_grad)
print(AE.parameters)
print("Number of parameters: %d" % num_params) #8*3 + 3 = 27, 3*8 + 8 = 32, 27+32

# optimizer object
optimizer = torch.optim.Adam(params = AE.parameters(), lr = learning_rate)
#criterion = nn.CrossEntropyLoss()    # for target, does not accept a OHE vector
criterion = nn.NLLLoss()

trainAE(AE,latent_dims=3)

<bound method Module.parameters of Autoencoder(
  (encoder_layer): Linear(in_features=8, out_features=3, bias=True)
  (decoder_layer): Linear(in_features=3, out_features=8, bias=True)
)>
Number of parameters: 59

Epoch 1/2000	loss=-0.00199	Test loss: -0.12272
Epoch 201/2000	loss=-0.00950	Test loss: -0.58467
Epoch 401/2000	loss=-0.01396	Test loss: -0.85909
Epoch 601/2000	loss=-0.01555	Test loss: -0.95691
Epoch 801/2000	loss=-0.01599	Test loss: -0.98398
Epoch 1001/2000	loss=-0.01615	Test loss: -0.99364
Epoch 1201/2000	loss=-0.01621	Test loss: -0.99741
Epoch 1401/2000	loss=-0.01623	Test loss: -0.99893
Epoch 1601/2000	loss=-0.01624	Test loss: -0.99955
Epoch 1801/2000	loss=-0.01625	Test loss: -0.99981
Training finished!


In [9]:
print("Print prediction results:")
x_test = np.eye(num)[np.arange(num)]                        # Test data (one-hot encoded)
x_test = Variable(torch.from_numpy(x_test))
x_test = x_test.to(device)
#np.set_printoptions(2)
for x in x_test:
    print("\tInput: {} \t Output: {}".format(x.cpu().detach().numpy(), np.round(AE(features=x.float(),latent_dims=3).cpu().detach().numpy(),decimals=2)))

Print prediction results:
	Input: [1. 0. 0. 0. 0. 0. 0. 0.] 	 Output: [[1. 0. 0. 0. 0. 0. 0. 0.]]
	Input: [0. 1. 0. 0. 0. 0. 0. 0.] 	 Output: [[0. 1. 0. 0. 0. 0. 0. 0.]]
	Input: [0. 0. 1. 0. 0. 0. 0. 0.] 	 Output: [[0. 0. 1. 0. 0. 0. 0. 0.]]
	Input: [0. 0. 0. 1. 0. 0. 0. 0.] 	 Output: [[0. 0. 0. 1. 0. 0. 0. 0.]]
	Input: [0. 0. 0. 0. 1. 0. 0. 0.] 	 Output: [[0. 0. 0. 0. 1. 0. 0. 0.]]
	Input: [0. 0. 0. 0. 0. 1. 0. 0.] 	 Output: [[0. 0. 0. 0. 0. 1. 0. 0.]]
	Input: [0. 0. 0. 0. 0. 0. 1. 0.] 	 Output: [[0. 0. 0. 0. 0. 0. 1. 0.]]
	Input: [0. 0. 0. 0. 0. 0. 0. 1.] 	 Output: [[0. 0. 0. 0. 0. 0. 0. 1.]]


## Extract intermediate features using Forward Hook

In [10]:
def printnorm_encoder(self, input1, output):
    # input is a tuple of packed inputs
    # output is a Tensor. output.data is the Tensor we are interested
    print('\tInside ' + self.__class__.__name__ + ' forward')
    #print('\t input:', input1.cpu().detach().numpy())
    print('\t output rounded to 2 decimals:', np.round(output.cpu().detach().numpy(),decimals=2))
    print('\t output rounded to integer:', np.round(output.cpu().detach().numpy(),decimals=0))

In [11]:
def inside_decoder(self, input1, output):
    # input is a tuple of packed inputs
    # output is a Tensor. output.data is the Tensor we are interested
    print('\tInside ' + self.__class__.__name__ + ' forward')
    #print(input1[0].cpu().detach().numpy())
    #print('\t input:', input1.cpu().detach().numpy())
    print('\t output:', input1[0].cpu().detach().numpy())
    print('\t output rounded to 2 decimals:', np.round(input1[0].cpu().detach().numpy(),2))

In [12]:
#AE.encoder_layer.register_forward_hook(printnorm_encoder)
decoder_hook = AE.decoder_layer.register_forward_hook(inside_decoder)
AE.eval()
for x in x_test:
    print('INPUT: {}'.format(x.cpu().detach().numpy()))
    out = AE(features=x.float(),latent_dims=3)
    #print(out)
decoder_hook.remove() #remove hook after use

INPUT: [1. 0. 0. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[1.6319963e-04 2.0543308e-04 9.9977738e-01]]
	 output rounded to 2 decimals: [[0. 0. 1.]]
INPUT: [0. 1. 0. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[2.2696436e-04 9.9935693e-01 9.8180637e-05]]
	 output rounded to 2 decimals: [[0. 1. 0.]]
INPUT: [0. 0. 1. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[9.9981028e-01 1.3466981e-04 9.9990559e-01]]
	 output rounded to 2 decimals: [[1. 0. 1.]]
INPUT: [0. 0. 0. 1. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[9.990946e-01 1.801695e-04 9.381091e-05]]
	 output rounded to 2 decimals: [[1. 0. 0.]]
INPUT: [0. 0. 0. 0. 1. 0. 0. 0.]
	Inside Linear forward
	 output: [[1.2642912e-04 9.9980468e-01 9.9981421e-01]]
	 output rounded to 2 decimals: [[0. 1. 1.]]
INPUT: [0. 0. 0. 0. 0. 1. 0. 0.]
	Inside Linear forward
	 output: [[9.9988413e-01 9.9990225e-01 9.4532719e-05]]
	 output rounded to 2 decimals: [[1. 1. 0.]]
INPUT: [0. 0. 0. 0. 0. 0. 1. 0.]
	Inside Linear forward
	 out

When the number of latent dimensions equals 3, each latent variable is binary.

## Latent dimensions set to 4

In [13]:
latent_dims=4
AE = Autoencoder(latent_dims=4)
AE = AE.to(device)

# optimizer object
optimizer = torch.optim.Adam(params = AE.parameters(), lr = learning_rate)
#criterion = nn.CrossEntropyLoss()    # for target, does not accept a OHE vector
criterion = nn.NLLLoss()

trainAE(AE,latent_dims=4)


Epoch 1/2000	loss=-0.00215	Test loss: -0.13359
Epoch 201/2000	loss=-0.01020	Test loss: -0.62743
Epoch 401/2000	loss=-0.01540	Test loss: -0.94826
Epoch 601/2000	loss=-0.01609	Test loss: -0.99000
Epoch 801/2000	loss=-0.01622	Test loss: -0.99788
Epoch 1001/2000	loss=-0.01624	Test loss: -0.99949
Epoch 1201/2000	loss=-0.01625	Test loss: -0.99986
Epoch 1401/2000	loss=-0.01625	Test loss: -0.99995
Epoch 1601/2000	loss=-0.01625	Test loss: -0.99999
Epoch 1801/2000	loss=-0.01625	Test loss: -1.00000
Training finished!


In [14]:
print("Print prediction results:")
x_test = np.eye(num)[np.arange(num)]                        # Test data (one-hot encoded)
x_test = Variable(torch.from_numpy(x_test))
x_test = x_test.to(device)
#np.set_printoptions(2)
for x in x_test:
    print("\tInput: {} \t Output: {}".format(x.cpu().detach().numpy(), np.round(AE(features=x.float(),latent_dims=4).cpu().detach().numpy(),decimals=2)))

Print prediction results:
	Input: [1. 0. 0. 0. 0. 0. 0. 0.] 	 Output: [[1. 0. 0. 0. 0. 0. 0. 0.]]
	Input: [0. 1. 0. 0. 0. 0. 0. 0.] 	 Output: [[0. 1. 0. 0. 0. 0. 0. 0.]]
	Input: [0. 0. 1. 0. 0. 0. 0. 0.] 	 Output: [[0. 0. 1. 0. 0. 0. 0. 0.]]
	Input: [0. 0. 0. 1. 0. 0. 0. 0.] 	 Output: [[0. 0. 0. 1. 0. 0. 0. 0.]]
	Input: [0. 0. 0. 0. 1. 0. 0. 0.] 	 Output: [[0. 0. 0. 0. 1. 0. 0. 0.]]
	Input: [0. 0. 0. 0. 0. 1. 0. 0.] 	 Output: [[0. 0. 0. 0. 0. 1. 0. 0.]]
	Input: [0. 0. 0. 0. 0. 0. 1. 0.] 	 Output: [[0. 0. 0. 0. 0. 0. 1. 0.]]
	Input: [0. 0. 0. 0. 0. 0. 0. 1.] 	 Output: [[0. 0. 0. 0. 0. 0. 0. 1.]]


In [15]:
decoder_hook = AE.decoder_layer.register_forward_hook(inside_decoder)
for x in x_test:
    print('INPUT: {}'.format(x.cpu().detach().numpy()))
    out = AE(features=x.float(),latent_dims=4)
    #print(out)
decoder_hook.remove() #remove hook after use

INPUT: [1. 0. 0. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[0.99822587 0.9967     0.00337273 0.9975757 ]]
	 output rounded to 2 decimals: [[1. 1. 0. 1.]]
INPUT: [0. 1. 0. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[4.8388448e-03 9.8960018e-01 9.7651826e-04 1.8593514e-03]]
	 output rounded to 2 decimals: [[0.   0.99 0.   0.  ]]
INPUT: [0. 0. 1. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[0.00184063 0.00159922 0.99723285 0.00293759]]
	 output rounded to 2 decimals: [[0. 0. 1. 0.]]
INPUT: [0. 0. 0. 1. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[0.00434178 0.9990497  0.9974759  0.98867387]]
	 output rounded to 2 decimals: [[0.   1.   1.   0.99]]
INPUT: [0. 0. 0. 0. 1. 0. 0. 0.]
	Inside Linear forward
	 output: [[0.00173963 0.0042547  0.00108422 0.9953544 ]]
	 output rounded to 2 decimals: [[0. 0. 0. 1.]]
INPUT: [0. 0. 0. 0. 0. 1. 0. 0.]
	Inside Linear forward
	 output: [[0.9951035  0.00619558 0.99766135 0.99881727]]
	 output rounded to 2 decimals: [[1.   0.01 1.  

## Latent dimensions set to 2

In [16]:
latent_dims =2
AE = Autoencoder(latent_dims=2)
AE = AE.to(device)

# optimizer object
optimizer = torch.optim.Adam(params = AE.parameters(), lr = learning_rate)
#criterion = nn.CrossEntropyLoss()    # for target, does not accept a OHE vector
criterion = nn.NLLLoss()

trainAE(AE,latent_dims=2)


Epoch 1/2000	loss=-0.00201	Test loss: -0.12373
Epoch 201/2000	loss=-0.00672	Test loss: -0.41296
Epoch 401/2000	loss=-0.00855	Test loss: -0.52737
Epoch 601/2000	loss=-0.00975	Test loss: -0.60242
Epoch 801/2000	loss=-0.01140	Test loss: -0.70205
Epoch 1001/2000	loss=-0.01284	Test loss: -0.79114
Epoch 1201/2000	loss=-0.01402	Test loss: -0.86391
Epoch 1401/2000	loss=-0.01477	Test loss: -0.90926
Epoch 1601/2000	loss=-0.01526	Test loss: -0.93915
Epoch 1801/2000	loss=-0.01558	Test loss: -0.95914
Training finished!


In [17]:
print("Print prediction results:")
x_test = np.eye(num)[np.arange(num)]                        # Test data (one-hot encoded)
x_test = Variable(torch.from_numpy(x_test))
x_test = x_test.to(device)
#np.set_printoptions(2)
for x in x_test:
    print("\tInput: {} \t Output: {}".format(x.cpu().detach().numpy(), np.round(AE(features=x.float(),latent_dims=2).cpu().detach().numpy(),decimals=2)))

Print prediction results:
	Input: [1. 0. 0. 0. 0. 0. 0. 0.] 	 Output: [[0.96 0.   0.02 0.01 0.01 0.   0.   0.  ]]
	Input: [0. 1. 0. 0. 0. 0. 0. 0.] 	 Output: [[0.   0.97 0.   0.   0.01 0.01 0.   0.  ]]
	Input: [0. 0. 1. 0. 0. 0. 0. 0.] 	 Output: [[0.01 0.   0.95 0.01 0.   0.   0.02 0.01]]
	Input: [0. 0. 0. 1. 0. 0. 0. 0.] 	 Output: [[0.   0.   0.01 0.99 0.   0.   0.   0.  ]]
	Input: [0. 0. 0. 0. 1. 0. 0. 0.] 	 Output: [[0.01 0.01 0.   0.   0.98 0.   0.   0.  ]]
	Input: [0. 0. 0. 0. 0. 1. 0. 0.] 	 Output: [[0.   0.01 0.   0.   0.   0.98 0.   0.01]]
	Input: [0. 0. 0. 0. 0. 0. 1. 0.] 	 Output: [[0.   0.   0.01 0.   0.   0.   0.97 0.01]]
	Input: [0. 0. 0. 0. 0. 0. 0. 1.] 	 Output: [[0.   0.   0.   0.   0.   0.01 0.01 0.97]]


In [18]:
decoder_hook = AE.decoder_layer.register_forward_hook(inside_decoder)
for x in x_test:
    print('INPUT: {}'.format(x.cpu().detach().numpy()))
    out = AE(features=x.float(),latent_dims=2)
    #print(out)
decoder_hook.remove() #remove hook after use

INPUT: [1. 0. 0. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[1.9784156e-05 4.0294147e-01]]
	 output rounded to 2 decimals: [[0.  0.4]]
INPUT: [0. 1. 0. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[4.0351558e-01 1.7159140e-05]]
	 output rounded to 2 decimals: [[0.4 0. ]]
INPUT: [0. 0. 1. 0. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[0.41172287 0.78208953]]
	 output rounded to 2 decimals: [[0.41 0.78]]
INPUT: [0. 0. 0. 1. 0. 0. 0. 0.]
	Inside Linear forward
	 output: [[2.0215523e-06 9.9999177e-01]]
	 output rounded to 2 decimals: [[0. 1.]]
INPUT: [0. 0. 0. 0. 1. 0. 0. 0.]
	Inside Linear forward
	 output: [[2.0881014e-06 1.9952490e-06]]
	 output rounded to 2 decimals: [[0. 0.]]
INPUT: [0. 0. 0. 0. 0. 1. 0. 0.]
	Inside Linear forward
	 output: [[9.999976e-01 1.909324e-06]]
	 output rounded to 2 decimals: [[1. 0.]]
INPUT: [0. 0. 0. 0. 0. 0. 1. 0.]
	Inside Linear forward
	 output: [[0.9999962  0.99999666]]
	 output rounded to 2 decimals: [[1. 1.]]
INPUT: [0. 0. 0. 0. 0. 0. 

The latent space has too few dimensions to perfectly output OHE vectors.