In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from models.seq2seq import Seq2seq, EncoderRNN, DecoderRNN

In [2]:
vocab_size = 100
embedding_dim = 30
embedding_matrix = torch.randn((vocab_size, embedding_dim))
hidden_size = 20
n_layers = 2
bidirectional =True

In [3]:
encoder = EncoderRNN(vocab_size=vocab_size, max_len=10, hidden_size=hidden_size, 
                    embedding_dim=embedding_dim, n_layers=n_layers, bidirectional=True,
                    rnn_cell = 'lstm', variable_lengths = False, embedding=embedding_matrix,
                    update_embedding = True)

for idx, m in enumerate(encoder.modules()):
    print(m)

EncoderRNN(
  (input_dropout): Dropout(p=0)
  (embedding): Embedding(100, 30)
  (rnn): LSTM(30, 20, num_layers=2, batch_first=True, bidirectional=True)
)
Dropout(p=0)
Embedding(100, 30)
LSTM(30, 20, num_layers=2, batch_first=True, bidirectional=True)


* 假设有下面的参数作为输入

In [4]:
inputs_Var = torch.randint(low=0, high=100, size = (3, 4))
# WITH BATCH_SIZE =3 AND LENGHT OF EACH BATCH AS 4
inputs_Var =inputs_Var.to(torch.int64)


In [5]:
outputs, hidden = encoder(inputs_Var)
print(outputs.shape)     # batch_size, seq_lens, num_directions*hidden_size
cell_state, hidden_state = hidden
print(cell_state.shape)  # num_layer*birdectional , batch_size, hidden_size

torch.Size([3, 4, 40])
torch.Size([4, 3, 20])


In [6]:
decoder_vocab_size = 50
decoder_max_len = 10
decoder_hidden_size = 40  # will be inline with the encoder's bidirectional, because of the lstm output
decoder_embedding = 30
decoder_embedding_matrix = torch.randn((decoder_vocab_size, decoder_embedding))

In [7]:
decoder_0 = DecoderRNN(decoder_vocab_size,decoder_max_len, embedding_dim=decoder_embedding, hidden_size=decoder_hidden_size, sos_id=0, eos_id=1,
                    n_layers=2, rnn_cell = 'lstm',bidirectional=True, use_attention=True, embedding = embedding_matrix)
# decoder_1 = DecoderRNN(decoder_vocab_size,decoder_max_len, hidden_size=hidden_size, sos_id=0, eos_id=1,
#                     n_layers=1, rnn_cell = 'lstm', use_attention=True)

In [8]:
my_seq2seq = Seq2seq(encoder, decoder_0)
for idx, m in enumerate(my_seq2seq.modules()):
    print(m)

Seq2seq(
  (encoder): EncoderRNN(
    (input_dropout): Dropout(p=0)
    (embedding): Embedding(100, 30)
    (rnn): LSTM(30, 20, num_layers=2, batch_first=True, bidirectional=True)
  )
  (decoder): DecoderRNN(
    (input_dropout): Dropout(p=0)
    (embedding): Embedding(50, 30)
    (rnn): LSTM(30, 40, num_layers=2, batch_first=True)
    (attention): Attention(
      (linear_out): Linear(in_features=80, out_features=40, bias=True)
    )
    (out): Linear(in_features=40, out_features=50, bias=True)
  )
)
EncoderRNN(
  (input_dropout): Dropout(p=0)
  (embedding): Embedding(100, 30)
  (rnn): LSTM(30, 20, num_layers=2, batch_first=True, bidirectional=True)
)
Dropout(p=0)
Embedding(100, 30)
LSTM(30, 20, num_layers=2, batch_first=True, bidirectional=True)
DecoderRNN(
  (input_dropout): Dropout(p=0)
  (embedding): Embedding(50, 30)
  (rnn): LSTM(30, 40, num_layers=2, batch_first=True)
  (attention): Attention(
    (linear_out): Linear(in_features=80, out_features=40, bias=True)
  )
  (out): Lin

In [9]:
target_variables = torch.randint(low=0, high = 40, size=(3, 2))
target_variables = target_variables.to(torch.int64)
target_variables

tensor([[ 21,  28],
        [ 37,  32],
        [ 24,  19]])

In [10]:
# print(outputs.shape)
# print(hidden)
# print(target_variables.shape)

In [11]:
results =my_seq2seq(inputs_Var, target_variable=target_variables)