In [3]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence

import sys
sys.path.append('..')

from nmt.datasets import Vocab, batch_iter
from nmt.networks import CharEmbedding, Encoder

from typing import List

In [4]:
## Setup something to work with

sentences_words = [
    ['Human:', 'What', 'do', 'we', 'want?'],
    ['Computer:', 'Natural', 'language', 'processing!'],
    ['Human:', 'When', 'do', 'we', 'want', 'it?'],
    ['Computer:', 'When', 'do', 'we', 'want', 'what?']
]

In [5]:
vocab = Vocab.build(sentences_words, sentences_words)

Initializing source vocab
Vocab Store: Tokens [size=17],                 Characters [size=97]
Initializing target vocab
Vocab Store: Tokens [size=17],                 Characters [size=97]


In [6]:
data = list(zip(sentences_words, sentences_words))
data_generator = batch_iter(
    data=data,
    batch_size=4,
    shuffle=True
)
batch_src, batch_tgt = next(data_generator)
print(batch_src)

[['Computer:', 'When', 'do', 'we', 'want', 'what?'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!']]


In [7]:
source_length = [len(sent) for sent in batch_src]
print(source_length)

[6, 6, 5, 4]


In [8]:
char_tensors = vocab.src.to_tensor(batch_src, tokens=False)
print(f"Char Tensor size = {char_tensors.size()}")

Char Tensor size = torch.Size([6, 4, 21])


In [9]:
encoder = Encoder(input_size=300, hidden_size=1024, num_layers=2)

In [10]:
c_embedding = CharEmbedding(num_embeddings=vocab.src.length(tokens=False), char_embedding_dim=50, embedding_dim=300, char_padding_idx=vocab.src.pad_char_idx)
char_embedding = c_embedding(char_tensors)
print(char_embedding.size())

torch.Size([6, 4, 300])


In [11]:
char_enc_hidden, (char_hidden, char_cell) = encoder(char_embedding, source_length)
char_enc_hidden.shape, char_hidden.shape, char_cell.shape

(torch.Size([4, 6, 2048]), torch.Size([4, 1024]), torch.Size([4, 1024]))

In [15]:
class Attention(nn.Module):
    def forward(self, enc_hidden: torch.Tensor,
                dec_hidden_t: torch.Tensor,
                enc_masks_all: torch.Tensor = None) -> torch.Tensor:
        dec_hidden_unsqueezed_t = dec_hidden_t.unsqueeze(dim=2)
        score_t = enc_hidden.bmm(dec_hidden_unsqueezed_t)
        score_t = score_t.squeezed(dim=2)

        if enc_masks_all:
            score_t.data.masked_fill_(
                enc_masks.byte().to(torch.bool),
                -float('inf')
            )
        
        alpha_t = F.softmax(score_t, dim=1)
        alpha_t = alpha.unsqueeze(dim=1)

        attention = alpha_t.bmm(enc_hidden)
        return attention.squeeze(dim=1)

In [16]:
attention = Attention()

In [17]:
attention(char_enc_hidden, char_hidden).shape

RuntimeError: Expected tensor to have size 2048 at dimension 1, but got size 1024 for argument #2 'batch2' (while checking arguments for bmm)

In [None]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        