# **Sequence to sequence**

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import random
import tqdm
import sys

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
class Dictionary:
    def __init__(self, dict_name):
        self.dict_name = dict_name
        self.word_to_idx = {
            "<sos>": 0,
            "<eos>": 1
        }
        self.idx_to_word = {
            0: "<sos>",
            1: "<eos>"
        }
        self.n_words = 2

    def add_word(self, word):
        if word not in self.word_to_idx:
            self.word_to_idx[word] = self.n_words
            self.idx_to_word[self.n_words] = word
            self.n_words += 1

    def add_sentence(self, sentence):
        words = sentence.split()

        for word in words:
            self.add_word(word)

    def get_len(self):
        return self.n_words

In [5]:
class Corpus():
    def __init__(self, src_dict, target_dict, max_sentece_length):
        self.src_dict = Dictionary(src_dict)
        self.target_dict = Dictionary(target_dict)
        self.max_sentece_length = max_sentece_length

    def get_data(self, path):
        pairs = []
    
        with open(path, "r") as file:
            lines = file.read().strip().split("\n")

            for line in lines:
                src, target = line.split("; ")

                src_len = len(src.split())
                target_len = len(target.split())

                if src_len <= self.max_sentece_length and target_len <= self.max_sentece_length:
                    self.src_dict.add_sentence(src)
                    self.target_dict.add_sentence(target)
                    
                    pairs.append([src, target])

        return pairs, self.src_dict, self.target_dict

In [6]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, emb_size, dropout_p):
        super(Encoder, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.emb_size = emb_size

        self.embedding = nn.Embedding(self.input_size, self.emb_size)
        self.dropout = nn.Dropout(dropout_p)
        self.GRU = nn.GRU(self.emb_size, self.hidden_size)

    def forward(self, src, hidden):
        emb_src = self.dropout(self.embedding(src))
        out, hidden = self.GRU(emb_src, hidden)
        
        return out, hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size, device=device)

In [7]:
class Decoder(nn.Module):
    def __init__(self, out_size, hidden_size, emb_size, dropout_p):
        super(Decoder, self).__init__()

        self.out_size = out_size
        self.hidden_size = hidden_size
        self.emb_size = emb_size

        self.embedding = nn.Embedding(self.out_size, self.emb_size)
        self.GRU = nn.GRU(self.emb_size, self.hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.fc_layer = nn.Linear(self.hidden_size, self.out_size)
    
    def forward(self, target, hidden):
        emb_target = self.dropout(self.embedding(target))

        out, hidden = self.GRU(emb_target, hidden)

        prediction = self.fc_layer(out)

        return prediction, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size, device=device)

In [8]:
class Seq_to_Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq_to_Seq, self).__init__()

        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, target, teacher_forcing_ratio=0.5):
        encoder_hidden = self.encoder.init_hidden()
        
        src_length = src.size(0)
        target_length = len(target)

        for ei in range(src_length):
            _, encoder_hidden = self.encoder.forward(src[ei], encoder_hidden)
        
        decoder_input = target[0]
        decoder_hidden = encoder_hidden
        decoder_outputs = torch.zeros(target_length - 1, 1, self.decoder.out_size).to(device)

        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

        for di in range(target_length - 1):
            decoder_out, decoder_hidden = self.decoder.forward(decoder_input, decoder_hidden)

            prediction = torch.tensor([decoder_out.argmax()]).to(device)
            
            decoder_outputs[di] = decoder_out

            if use_teacher_forcing:
                decoder_input = target[di + 1]
            else:
                decoder_input = prediction

        return decoder_outputs.view(target_length - 1, -1)

In [9]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.xavier_uniform_(param.data.unsqueeze_(0))

In [10]:
def pair_to_tensor(pair, src_dict, target_dict):
    src = pair[0]
    src_indexes = [src_dict.word_to_idx[word] for word in src.split()]
    src_tensor = torch.tensor(src_indexes, dtype=torch.long, device=device).unsqueeze(1)

    target = pair[1]
    target_indexes = [target_dict.word_to_idx[word] for word in target.split()]
    target_tensor = torch.tensor(target_indexes, dtype=torch.long, device=device).unsqueeze(1)

    return src_tensor, target_tensor

In [11]:
epochs = 15
emb_size = 256
hidden_size = 512
dropout_p = 0.5
learning_rate = 0.0007
max_sentence_length = 10

In [12]:
corpus = Corpus("en", "de", max_sentence_length)
pairs, src_dict, target_dict = corpus.get_data("/content/sample_data/text_dataset_for_seq_2_seq.txt")

In [13]:
pairs_count = len(pairs)
src_dict_size = src_dict.n_words
target_dict_size = target_dict.n_words

In [14]:
encoder = Encoder(src_dict_size, hidden_size, emb_size, dropout_p)

In [15]:
decoder = Decoder(target_dict_size, hidden_size, emb_size, dropout_p)

In [16]:
seq_to_seq = Seq_to_Seq(encoder, decoder).to(device)
seq_to_seq.apply(init_weights)

Seq_to_Seq(
  (encoder): Encoder(
    (embedding): Embedding(2971, 256)
    (dropout): Dropout(p=0.5, inplace=False)
    (GRU): GRU(256, 512)
  )
  (decoder): Decoder(
    (embedding): Embedding(3798, 256)
    (GRU): GRU(256, 512)
    (dropout): Dropout(p=0.5, inplace=False)
    (fc_layer): Linear(in_features=512, out_features=3798, bias=True)
  )
)

In [17]:
optimizer = optim.Adam(seq_to_seq.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [18]:
for epoch in range(epochs):
    epoch_loss = 0

    seq_to_seq.train()

    for pair in tqdm.tqdm(pairs, file=sys.stdout):
        src, target = pair_to_tensor(pair, src_dict, target_dict)
        optimizer.zero_grad()

        out = seq_to_seq(src, target)
        loss = criterion(out, target[1:].squeeze())

        loss.backward()
        torch.nn.utils.clip_grad_norm_(seq_to_seq.parameters(), 1)
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch}; loss: {epoch_loss / pairs_count}\n")

100%|██████████| 4003/4003 [01:00<00:00, 66.02it/s]
Epoch 0; loss: 0.5892894593358763

100%|██████████| 4003/4003 [01:00<00:00, 66.11it/s]
Epoch 1; loss: 0.5477690424992254

100%|██████████| 4003/4003 [01:00<00:00, 65.87it/s]
Epoch 2; loss: 0.5268576764707174

100%|██████████| 4003/4003 [00:59<00:00, 66.85it/s]
Epoch 3; loss: 0.5556061784171301

100%|██████████| 4003/4003 [01:00<00:00, 65.98it/s]
Epoch 4; loss: 0.545709761226676

100%|██████████| 4003/4003 [01:00<00:00, 66.68it/s]
Epoch 5; loss: 0.5100897748448198

100%|██████████| 4003/4003 [01:00<00:00, 66.07it/s]
Epoch 6; loss: 0.5042529957985228

100%|██████████| 4003/4003 [01:02<00:00, 64.28it/s]
Epoch 7; loss: 0.5077478530621783

100%|██████████| 4003/4003 [01:01<00:00, 64.73it/s]
Epoch 8; loss: 0.4985605996260746

100%|██████████| 4003/4003 [01:01<00:00, 64.75it/s]
Epoch 9; loss: 0.48033828865177725

100%|██████████| 4003/4003 [01:05<00:00, 61.14it/s]
Epoch 10; loss: 0.5056558893523613

100%|██████████| 4003/4003 [01:03<00:00, 6

In [19]:
with torch.no_grad():
    for i in range(5):
        random_pair = random.choice(pairs)

        src, target = pair_to_tensor(random_pair, src_dict, target_dict)

        decoded_sentence = []

        out = seq_to_seq.forward(src, target)
        
        predict = "<sos> " + " ".join([target_dict.idx_to_word[idx.item()] for idx in out.argmax(dim=1)])

        print(f"source {i}:  ", random_pair[0])
        print(f"target {i}:  ", random_pair[1])
        print(f"predict {i}: ", predict)


source 0:   <sos> a man making balloon and enjoy <eos>
target 0:   <sos> ein mann blast vergnugt einen luftballon auf <eos>
predict 0:  <sos> ein mann blast vergnugt einen luftballon auf <eos>

source 1:   <sos> a woman is selling handmade hats <eos>
target 1:   <sos> eine frau verkauft handgefertigte hute <eos>
predict 1:  <sos> eine frau verkauft handgefertigte hute <eos>

source 2:   <sos> a young girl holds a crying infant <eos>
target 2:   <sos> ein kleines madchen halt ein weinendes baby <eos>
predict 2:  <sos> ein junges madchen halt ein weinendes auf <eos>

source 3:   <sos> a flower girl is posing in a doorway <eos>
target 3:   <sos> ein blumenmadchen posiert in einer turoffnung <eos>
predict 3:  <sos> ein blumenmadchen posiert in einer turoffnung <eos>

source 4:   <sos> the baby is holding a santa figurine <eos>
target 4:   <sos> das baby halt eine weihnachtsmannpuppe <eos>
predict 4:  <sos> das junge halt eine weihnachtsmannpuppe <eos>

