# Sequence to Sequence RNN

In this notebook you can train a sequence-to-sequence (seq2seq) model using an encoder-decoder RNN with an attention mechanism.

In [8]:
import numpy as np
import torch
from dataset.npz_dataset import NPZSequencesDataset
from models.rnn import CellType
from torch.utils.data import DataLoader
from training.seq2seq_rnn_attn import Seq2SeqAttentionRNNPredictor

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# Find out if a CUDA device (GPU) is available
if torch.cuda.device_count():
    device="cuda"
else:
    device="cpu"
print("Device", device)

Device cpu


In [10]:
lr = 1e-3                   # The learning rate of the model
cell_type=CellType.LSTM     # Cell type (LSTM | GRU | RNN)
n_epochs = 10               # Number of epochs
num_layers=2                # Number of RNN layers
embedding_dim=32            # Embedding dimension
hidden_size=32              # Hidden size of the RNN layers
batch_size=256              # Batch size used for training
max_length=20               # Maximum sequence length
bidirectional=True          # True if bidirectional RNN layers should be used, False otherwise

In [11]:

# Load english and french sentences
sequences_en = np.load("small_vocab_en.npz")["data"]
sequences_fr = np.load("small_vocab_fr.npz")["data"]
vocab_size_en = sequences_en.max()+1
vocab_size_fr = sequences_fr.max()+1

In [12]:
# Create an instance of the dataset and a dataloader
dataset = NPZSequencesDataset("small_vocab_en.npz", "small_vocab_fr.npz", max_length=max_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [None]:
# Create an instance of the predictor
predictor = Seq2SeqAttentionRNNPredictor(
    vocab_size_in=vocab_size_en,
    vocab_size_out=vocab_size_fr,
    max_length=max_length,
    num_layers=num_layers,
    batch_size=batch_size,
    embedding_dim=embedding_dim,
    hidden_size=hidden_size,
    cell_type=cell_type,
    bidirectional=bidirectional,
    device=device,
)

In [None]:
# Train the predictor
predictor.train(
    dataloader=dataloader,
    epochs=n_epochs,
    batch_size=batch_size,
    lr=lr,
)