In [1]:
import torch
import pandas as pd
from torch.nn.utils.rnn import pad_sequence
from torchtext.data.utils import get_tokenizer
from torch.utils.data import DataLoader
import sys
sys.path.append('./data_prep')
from sentence_dataset_class import ProcessedSentences
from sentence_processing import build_vocab,sentence_processing
sys.path.append('./transformer_testing')
from tomislav_transformer import Seq2SeqTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
df_train = pd.read_json('data/train_data.json')
df_test = pd.read_json('data/test_data.json')

In [4]:
UNK_IDX, PAD_IDX, BOS_IDX, EOS_IDX = 0, 1, 2, 3
special_symbols = ['<unk>', '<pad>', '<bos>', '<eos>']

In [5]:
token_transform = get_tokenizer('basic_english')

In [6]:
train_input_vocab = build_vocab(df_train['input_data'],token_transform,special_symbols)
train_output_vocab = build_vocab(df_train['output_data'],token_transform,special_symbols)

In [7]:
train_input_sentences = [sentence_processing(sentence,train_input_vocab,token_transform,special_symbols.index('<bos>'),special_symbols.index('<eos>')) for sentence in df_train['input_data'].values]
train_output_sentences = [sentence_processing(sentence,train_output_vocab,token_transform,special_symbols.index('<bos>'),special_symbols.index('<eos>')) for sentence in df_train['output_data'].values]

In [8]:
test_input_sentences = [sentence_processing(sentence,
                                            train_input_vocab,
                                            token_transform,special_symbols.index('<bos>'),
                                            special_symbols.index('<eos>')) 
                        for sentence in df_test['input_data'].values]
test_output_sentences = [sentence_processing(sentence,train_output_vocab,token_transform,special_symbols.index('<bos>'),special_symbols.index('<eos>')) for sentence in df_test['output_data'].values]

In [9]:
train_input_sentences_padded = pad_sequence(train_input_sentences,batch_first=True,padding_value=PAD_IDX)

In [31]:
train_input_sentences_padded[0]

tensor([    2,  1129,    16,     9,    13,    65,   226,  2936,   276,    32,
          246,     8,   230,   231,    27, 14821,    31,    16,   308,     9,
            8,  1745,    35,  3081,   124,   901,    17,   197,     4,     3,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1, 

In [10]:
train_input_sentences = pad_sequence(train_input_sentences,batch_first=True,padding_value=PAD_IDX)
train_output_sentences = pad_sequence(train_output_sentences,batch_first=True,padding_value=PAD_IDX)
test_input_sentences = pad_sequence(test_input_sentences,batch_first=True,padding_value=PAD_IDX)
test_output_sentences = pad_sequence(test_output_sentences,batch_first=True,padding_value=PAD_IDX)

In [14]:
train_dataset = ProcessedSentences(
    input_data = train_input_sentences,
    output_data = train_output_sentences,
)
test_dataset = ProcessedSentences(
    input_data = test_input_sentences,
    output_data = test_output_sentences
)

In [15]:
import random

teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=10):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[BOS_IDX]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_IDX:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [16]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [17]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [18]:
def trainIters(encoder, decoder, n_epochs, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = torch.optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = torch.optim.SGD(decoder.parameters(), lr=learning_rate)
    train_dataloder = DataLoader(train_dataset, batch_size=32, shuffle=True)
    
    criterion = torch.nn.NLLLoss()

    for iter in range(1, n_epochs + 1):
        
        for batch in train_dataloder:
            input_tensor = batch[0]
            target_tensor = batch[1]

            loss = train(input_tensor, target_tensor, encoder,
                         decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
            plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_epochs),
                                         iter, iter / n_epochs * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
    showPlot(plot_losses)