In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from Transformer import Transformer_encoder, Transformer_decoder
import matplotlib.pyplot as plt
from math import sin, cos
import seaborn as sns
import numpy as np

%load_ext autoreload
%autoreload 2

BATCH_SIZE = 1
MAX_LEN = 5
VOCA_SIZE = 10000
NUM_LAYERS = 3
NUM_HEADS = 2
EMBEDDING_DIM = 4

def sentence(voca_size, length, max_len):
    s = np.random.choice(voca_size, length)
    pad = np.zeros(max_len -length)
    return np.concatenate([s,pad])

# ENCODER

In [2]:
encoder_lengths = list(np.random.choice(range(2,MAX_LEN), BATCH_SIZE))
encoder_inputs = torch.LongTensor([sentence(VOCA_SIZE, l, MAX_LEN) for l in encoder_lengths])
print(f"Encoder inputs:")
print(encoder_inputs)
print()

Encoder inputs:
tensor([[6179, 1113, 1003, 4753,    0]])



In [3]:
encoder = Transformer_encoder(NUM_LAYERS, NUM_HEADS, VOCA_SIZE, EMBEDDING_DIM)
context = encoder(encoder_inputs)
print(f"Context size: {context.size()}")
print(f"Encoder lengths: {encoder_lengths}")
print(context)

Context size: torch.Size([1, 5, 4])
Encoder lengths: [4]
tensor([[[-1.5599, -0.0335,  0.4118,  1.1816],
         [ 1.7072, -0.5390, -0.3469, -0.8212],
         [ 1.2864, -1.4800,  0.3576, -0.1640],
         [ 0.3521, -1.2853, -0.4791,  1.4123],
         [ 0.0000,  0.0000,  0.0000,  0.0000]]], grad_fn=<AddcmulBackward>)


# DECODER

In [4]:
decoder_lengths = list(np.random.choice(range(2, MAX_LEN), BATCH_SIZE))
decoder_inputs = torch.LongTensor([sentence(VOCA_SIZE, l, MAX_LEN) for l in decoder_lengths])

print(f"Decoder inputs:")
print(decoder_inputs)

Decoder inputs:
tensor([[ 191, 5344,  606, 4918,    0]])


In [5]:
decoder = Transformer_decoder(NUM_LAYERS, NUM_HEADS, VOCA_SIZE, EMBEDDING_DIM, MAX_LEN)
outputs = decoder(decoder_inputs, context, train=True)
print(f"Decoder output size: {outputs.size()}")
print(f"Decoder lengths: {decoder_lengths}")
print(outputs)

Decoder output size: torch.Size([1, 5, 10000])
Decoder lengths: [4]
tensor([[[-0.2263,  0.3481,  0.2369,  ..., -0.9305,  0.7762, -1.4372],
         [-0.5431, -0.1412, -0.7730,  ...,  0.8486, -0.0276,  0.7954],
         [-1.0668,  0.6207,  0.6913,  ...,  0.9590,  0.7337,  0.8203],
         [-0.1631,  0.4238,  0.4986,  ..., -0.3782,  0.1453, -1.0012],
         [-0.3810,  0.1576, -0.1229,  ...,  0.1578,  0.1786, -0.1664]]],
       grad_fn=<AddBackward0>)


# Inference

In [6]:
encoder_lengths = list(np.random.choice(range(2, MAX_LEN), 1))
decoder_lengths = list(np.random.choice(range(2, MAX_LEN), 1))

encoder_inputs = torch.LongTensor([sentence(VOCA_SIZE, l, MAX_LEN) for l in encoder_lengths])
print(f"Encoder lengths: {encoder_lengths}")
print(encoder_inputs)

Encoder lengths: [3]
tensor([[1583,  674, 9172,    0,    0]])


In [7]:
context = encoder(encoder_inputs)
print(f"Context size: {context.size()}")
print(context)

Context size: torch.Size([1, 5, 4])
tensor([[[-1.6683,  0.5388,  0.9446,  0.1849],
         [ 0.3961,  0.5981, -1.7200,  0.7258],
         [ 1.0363, -1.3185,  0.8987, -0.6164],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000]]], grad_fn=<AddcmulBackward>)


In [8]:
decoder.eval()
sos = torch.zeros(1,MAX_LEN, dtype=torch.long)
sos[0,0] = 2
sos[0,:5] = torch.LongTensor([2,0,0,0,0])
outputs = decoder(sos, context, train=False)
print(f"outputs size: {outputs.size()}")
print(outputs[0])

outputs size: torch.Size([1, 5, 10000])
tensor([[-0.4742, -0.0823, -0.5671,  ...,  1.2221, -0.5087,  1.0663],
        [-0.7382,  0.2376,  0.0471,  ...,  1.4286, -0.2412,  1.2623],
        [-0.8098,  0.5106,  0.6302,  ...,  1.5239, -0.2501,  1.2395],
        [-0.6865,  0.8304,  1.2111,  ...,  0.0125,  0.7494, -0.4918],
        [-0.9732,  0.9132,  1.3609,  ...,  0.8023,  0.6100,  0.4235]],
       grad_fn=<SelectBackward>)
