# Imports and Drive

In [None]:
!pip install torchsummary
!pip install transformers datasets
from torchsummary import summary

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 12.2 MB/s 
[?25hCollecting datasets
  Downloading datasets-1.16.1-py3-none-any.whl (298 kB)
[K     |████████████████████████████████| 298 kB 52.5 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 36.7 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 48.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 507 kB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████

In [None]:
!pip install torchtext==0.6.0
import os
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import itertools
import torch.nn as nn
from collections import Counter
import sys
import gym
from pylab import *
from datetime import datetime
from random import random, randrange, getrandbits, randint, seed 
import argparse
from absl import logging
import tensorflow as tf
import torch.optim as optim
import tensorflow_hub as hub
import matplotlib.pyplot as plt
from torchtext.data import Field, BucketIterator, Dataset, Example, TabularDataset
from tqdm import tqdm_notebook, tqdm

import re
import seaborn as sns

Collecting torchtext==0.6.0
  Downloading torchtext-0.6.0-py3-none-any.whl (64 kB)
[?25l[K     |█████                           | 10 kB 26.7 MB/s eta 0:00:01[K     |██████████▏                     | 20 kB 24.1 MB/s eta 0:00:01[K     |███████████████▎                | 30 kB 17.8 MB/s eta 0:00:01[K     |████████████████████▍           | 40 kB 15.6 MB/s eta 0:00:01[K     |█████████████████████████▌      | 51 kB 10.8 MB/s eta 0:00:01[K     |██████████████████████████████▋ | 61 kB 12.3 MB/s eta 0:00:01[K     |████████████████████████████████| 64 kB 2.0 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 27.5 MB/s 
Installing collected packages: sentencepiece, torchtext
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.11.0
    Uninstalling torchtext-0.11.0:
      Successfully uninstalled torchtext-0.11.0
Successfully i

In [None]:
from google.colab import drive
drive.mount("/content/drive")
MY_DRIVE = '/content/drive/MyDrive'
base_dir = "/content/drive/MyDrive/nlp_final_project/"
data_dir = os.path.join(base_dir, "data")

Mounted at /content/drive


# Data Pre-processing

## Main Data Pre-processing

In [None]:
inputs = []
targets = []
for filename in os.listdir(data_dir+"/modern"):
  lines_modern = 0
  with open(os.path.join(data_dir+"/modern", filename), "r") as f:
    for line in f:
      targets.append(line.strip())
      lines_modern +=1
  corresponding_shakespeare_file = filename.split("_")[0] + "_original.snt.aligned"
  lines_original = 0
  with open(os.path.join(data_dir+"/shakespeare", corresponding_shakespeare_file), "r") as f:
    for line in f:
      inputs.append(line.strip())
      lines_original +=1
  print(lines_original, lines_modern)

1462 1462
755 755
1427 1427
1103 1103
1066 1066
1085 1085
1706 1706
1218 1218
1722 1722
1144 1144
1201 1201
856 856
1080 1080


In [None]:
def get_input_target_pair(i):
  return (inputs[i], targets[i])

In [None]:
print("Shakespearean input:" ,get_input_target_pair(0)[0])
print("NAV target: ", get_input_target_pair(0)[1])
print("Shakespearean input:" ,get_input_target_pair(7453)[0])
print("NAV target: ", get_input_target_pair(7453)[1])


Shakespearean input: Riddling confession finds but riddling shrift.
NAV target:  A jumbled confession can only receive a jumbled absolution.
Shakespearean input: Leave me, I pray, a little.
NAV target:  Just give me a moment, please—only a moment.


In [None]:
input_train, input_val, targets_train, targets_val = train_test_split(inputs, targets, test_size=0.2, random_state=42)

In [None]:
MAX_LEN = 50

SRC = Field(
            tokenizer_language = "en",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)
TRG = Field(
            tokenizer_language = "en",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)

fields = [("src", SRC),("trg",TRG)]

In [None]:
with open("train.tsv", "w") as f:
  for i in range(len(input_train)):
    inp = input_train[i]
    trg = targets_train[i]
    f.write(inp + "\t" + trg + "\n")

with open("valid.tsv", "w") as f:
  for i in range(len(input_val)):
    inp = input_val[i]
    trg = targets_val[i]
    f.write(inp + "\t" + trg + "\n")

In [None]:
train_data, valid_data = TabularDataset.splits(
    path='',
    train='train.tsv',
    validation='valid.tsv',
    format="tsv",
    fields=fields,
    skip_header=False
)

In [None]:
BATCH_SIZE = 128
print("Length of train data: ", len(train_data))
print("Length of valid data: ", len(valid_data))

Length of train data:  12660
Length of valid data:  3165


In [None]:
SRC.build_vocab(train_data, min_freq = 1)
input_size = len(SRC.vocab)

TRG.build_vocab(train_data, min_freq= 1)
output_size = len(TRG.vocab)

PAD_IDX = TRG.vocab.stoi["<pad>"]

device = torch.device(
    'cuda' if torch.cuda.is_available() else 'cpu'
)

train_loader, valid_loader = BucketIterator.splits(
    (train_data, valid_data),
    batch_size=BATCH_SIZE,
    device=device,
    sort=False
)
print(input_size)
print(output_size)

16948
14445


# Models


## Seq2Seq Without Attention

Our implementation of a Seq2Seq model without attention layers.

### Encoder

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_size, emb_size, encoder_hidden_size, decoder_hidden_size, dropout = 0.2):
        super(Encoder, self).__init__()
        self.input_size = input_size
        self.emb_size = emb_size
        self.encoder_hidden_size = encoder_hidden_size
        self.decoder_hidden_size = decoder_hidden_size
        self.model_type = model_type
        self.embedding_layer = nn.Embedding(input_size, emb_size)
        self.recurrent_layer = nn.LSTM(emb_size, encoder_hidden_size, batch_first=True)
        self.linear_1 = nn.Linear(encoder_hidden_size, encoder_hidden_size)
        self.relu_layer = nn.ReLU(inplace=False)
        self.linear_2 = nn.Linear(encoder_hidden_size, decoder_hidden_size)
        self.dropout_layer = nn.Dropout(dropout)

    def forward(self, input):
        dropped = self.dropout_layer(self.embedding_layer(input))
        output, hidden = self.recurrent_layer(dropped)
        output = output
        h = torch.tanh(self.linear_2(self.relu_layer(self.linear_1(hidden[0]))))
        c = hidden[1]
        hidden = (h, c)
        return output, hidden

### Decoder

In [None]:

class Decoder(nn.Module):
    def __init__(self, emb_size, encoder_hidden_size, decoder_hidden_size, output_size, dropout = 0.2):
        super(Decoder, self).__init__()
        self.emb_size = emb_size
        self.encoder_hidden_size = encoder_hidden_size
        self.decoder_hidden_size = decoder_hidden_size
        self.output_size = output_size
        self.model_type = model_type
        self.embedding_layer = nn.Embedding(output_size, emb_size)
        self.recurrent_layer = nn.LSTM(emb_size, encoder_hidden_size, batch_first=True)
        self.linear = nn.Linear(decoder_hidden_size, output_size)
        self.log_softmax = nn.LogSoftmax()
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden):
        dropped = self.dropout(self.embedding_layer(input))
        output, new_hidden = self.recurrent_layer(dropped, hidden)
        output = self.log_softmax(self.linear(output[:, 0, :]))
        hidden = new_hidden
        return output, hidden


### Seq2Seq

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.device = device
        self.encoder = encoder.to(self.device)
        self.decoder = decoder.to(self.device)

    def forward(self, source, out_seq_len = None):
        batch_size = source.shape[0]
        if out_seq_len is None:
            seq_len = source.shape[1]

        outputs = []
        sos_output = torch.zeros([batch_size, self.decoder.output_size], dtype=torch.float, device=self.device)
        first_hidden_state = self.encoder.forward(source)[1]
        outputs.append(sos_output)
        
        for timestep in range(seq_len - 1):
            if timestep == 0:
                output, hidden = self.decoder.forward(source[:, 0].reshape((batch_size, 1)), first_hidden_state)
            else:
                output, hidden = self.decoder.forward(torch.argmax(output, dim=1).reshape((batch_size, 1)), hidden)
            outputs.append(output)

        outputs = [torch.unsqueeze(output, 0) for output in outputs]
        outputs = torch.cat(outputs, dim=0)
        outputs = outputs.transpose(1,0)
        return outputs

##SeqSeq With Attention

Our implementation of a Seq2Seq Encoder-Decoder architecture using attention.

### Encoder

In [None]:
class EncoderAtt(nn.Module):
    def __init__(self, input_dim, emb_size, enc_hid_dim, dec_hid_dim, n_layers, dropout):
        super().__init__()
        self.emb_size = emb_size
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.input_dim = input_dim
        self.n_layers = n_layers
        self.dropout = dropout
        self.embedding = nn.Embedding(input_dim, emb_size)
        self.rnn = nn.GRU(emb_size, enc_hid_dim, n_layers, dropout=dropout,
                          bidirectional=True)
        self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)

    def forward(self, src_batch):
        emb = self.embedding(src_batch)
        outputs, hidden = self.rnn(emb)
        cat = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
        hidden = torch.tanh(self.fc(cat))
        return outputs, hidden

### Attention

In [None]:
class Attention(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.fc1 = nn.Linear(enc_hid_dim * 2 + dec_hid_dim, dec_hid_dim)
        self.fc2 = nn.Linear(dec_hid_dim, 1, bias=False)

    def forward(self, encoder_outputs, hidden):
        src_len = encoder_outputs.shape[0]
        batch_size = encoder_outputs.shape[1]
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        out = encoder_outputs.permute(1, 0, 2)
        cat = torch.cat((hidden, out), dim=2)
        tanh = torch.tanh(self.fc1(cat))       
        att = torch.softmax(self.fc2(tanh).squeeze(dim=2), dim=1)
        return att

### Decoder

In [None]:
class DecoderAtt(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, n_layers,
                 dropout, attention):
        super().__init__()
        self.emb_dim = emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.dropout = dropout
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU(enc_hid_dim * 2 + emb_dim, dec_hid_dim, n_layers, dropout=dropout)
        self.linear = nn.Linear(dec_hid_dim, output_dim)

    def forward(self, trg, encoder_outputs, hidden):
        att = self.attention(encoder_outputs, hidden).unsqueeze(1)
        out = encoder_outputs.permute(1, 0, 2)
        bmm = torch.bmm(att, out).permute(1, 0, 2)
        emb = self.embedding(trg.unsqueeze(0))
        cat = torch.cat((emb, bmm), dim=2)
        outputs, hidden = self.rnn(cat, hidden.unsqueeze(0))
        preds = self.linear(outputs.squeeze(0))
        return preds, hidden.squeeze(0)

### Seq2Seq

In [None]:
class Seq2SeqAtt(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src_batch, trg_batch, teacher_forcing_ratio=0.5):
        max_len, batch_size = trg_batch.shape
        trg_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)
        encoder_outputs, hidden = self.encoder(src_batch)

        trg = trg_batch[0]
        for i in range(1, max_len):
            prediction, hidden = self.decoder(trg, encoder_outputs, hidden)
            outputs[i] = prediction

            if random() < teacher_forcing_ratio:
                trg = trg_batch[i]
            else:
                trg = prediction.argmax(1)
        return outputs

## Transformer

Our implementation of a transformer layer.

In [None]:
class Transformer(nn.Module):
    def __init__(self, input_size, output_size, device, hidden_dim=128, num_heads=2, dim_feedforward=2048, dim_k=96, dim_v=96, dim_q=96, max_length=43):     
        super(Transformer, self).__init__()
        self.num_heads = num_heads
        self.word_embedding_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.dim_feedforward = dim_feedforward
        self.max_length = max_length
        self.input_size = input_size
        self.output_size = output_size
        self.device = device
        self.dim_key = dim_k
        self.dim_value = dim_v
        self.dim_query = dim_q
        
        self.embedding_layer = nn.Embedding(self.input_size, self.word_embedding_dim)
        self.positional_encoding_layer = nn.Embedding(self.max_length, self.word_embedding_dim).to(device)
        
        self.key_1 = nn.Linear(self.hidden_dim, self.dim_key)
        self.value_1 = nn.Linear(self.hidden_dim, self.dim_value)
        self.query_1 = nn.Linear(self.hidden_dim, self.dim_query)

        self.key_2 = nn.Linear(self.hidden_dim, self.dim_key)
        self.value_2 = nn.Linear(self.hidden_dim, self.dim_value)
        self.query_2 = nn.Linear(self.hidden_dim, self.dim_query)
        self.softmax = nn.Softmax(dim=2)
        self.attention_head_linear = nn.Linear(self.dim_value * self.num_heads, self.hidden_dim)
        self.norm_multi_head = nn.LayerNorm(self.hidden_dim)
        
        self.fc1 = nn.Linear(self.hidden_dim, self.dim_feedforward)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(self.dim_feedforward, self.hidden_dim)
        self.layer_normalization = nn.LayerNorm(self.hidden_dim)

        self.fc3 = nn.Linear(self.hidden_dim, self.output_size)
        
    def forward(self, inputs):
        inputs = inputs.to(device)
        outputs = self.final_linear_layer(self.feed_forward_layer(self.multi_head_attention_layer(self.custom_embedding_layer(inputs)))).to(device)
        return outputs
    
    def custom_embedding_layer(self, inputs):
        inputs = inputs.to(device)
        word = self.embedding_layer(inputs)
        position = self.positional_encoding_layer(torch.arange(inputs.shape[1]).cuda()).unsqueeze(0)
        return word + position
        
    def multi_head_attention_layer(self, inputs):
        inputs = inputs.to(device)
        h1 = self.softmax(self.query_1(inputs) @ torch.transpose(self.key_1(inputs), 1, 2) / np.sqrt(self.dim_key)) @ self.value_1(inputs)
        h2 = self.softmax(self.query_2(inputs) @ torch.transpose(self.key_2(inputs), 1, 2) / np.sqrt(self.dim_key)) @ self.value_2(inputs)
        stacked = torch.stack((h1, h2), dim=2)
        stacked = stacked.reshape((stacked.size()[0], stacked.size()[1], -1))
        a_h_p = self.attention_head_linear(stacked)
        return self.norm_multi_head(inputs + a_h_p).to(device)
    
    def feed_forward_layer(self, inputs):
        inputs = inputs.to(device)
        outputs = self.fc2(self.relu(self.fc1(inputs)))
        outputs = self.layer_normalization(outputs + inputs)
        return outputs
        
    def final_linear_layer(self, inputs):
        inputs = inputs.to(device)
        outputs = None
        outputs = self.fc3(inputs)
        return outputs

# Training

In [None]:
# adjustable parameters
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 256
DEC_HID_DIM =  256
N_LAYERS = 1
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1
TRANS_HIDDEN_DIM = 128 #128
learning_rate = 5e-3 # 1e-2

In [None]:
model_name = "transformer" #@param ["seq2seq_att", "transformer", "seq2seq"]

if model_name == "transformer":
  transformer_model = Transformer(INPUT_DIM, OUTPUT_DIM, hidden_dim=TRANS_HIDDEN_DIM, device=device, max_length=MAX_LEN).to(device)
  model = transformer_model
elif model_name == "seq2seq_att":
  attention = Attention(ENC_HID_DIM, DEC_HID_DIM)
  encoder_att = EncoderAtt(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, ENC_DROPOUT)
  decoder_att = DecoderAtt(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, DEC_DROPOUT, attention)
  seq2seq_att_model = Seq2SeqAtt(encoder_att, decoder_att, device).to(device)
  model = seq2seq_att_model
else:
  encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, dropout = ENC_DROPOUT)
  decoder = Decoder(DEC_EMB_DIM, ENC_HID_DIM, ENC_HID_DIM, OUTPUT_DIM, dropout = DEC_DROPOUT)
  seq2seq_model = Seq2Seq(encoder, decoder, device).to(device)
  model = seq2seq_model


print(device)
model = model.to(device)
print(model)

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

cuda
Transformer(
  (embedding_layer): Embedding(16948, 128)
  (positional_encoding_layer): Embedding(50, 128)
  (key_1): Linear(in_features=128, out_features=96, bias=True)
  (value_1): Linear(in_features=128, out_features=96, bias=True)
  (query_1): Linear(in_features=128, out_features=96, bias=True)
  (key_2): Linear(in_features=128, out_features=96, bias=True)
  (value_2): Linear(in_features=128, out_features=96, bias=True)
  (query_2): Linear(in_features=128, out_features=96, bias=True)
  (softmax): Softmax(dim=2)
  (attention_head_linear): Linear(in_features=192, out_features=128, bias=True)
  (norm_multi_head): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=128, out_features=2048, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=2048, out_features=128, bias=True)
  (layer_normalization): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (fc3): Linear(in_features=128, out_features=14445, bias=True)
)


## Train/Eval Functions

In [None]:
def train(model, dataloader, optimizer, criterion, scheduler = None):

    model.train()

    total_loss = 0.0

    progress_bar = tqdm_notebook(dataloader, ascii = True)
    for batch_idx, data in enumerate(progress_bar):
        
        ### REPLACE source/target for Transformer or Seq2Seq w/o Attention ###
        source = data.src.transpose(1,0)
        target = data.trg.transpose(1,0)
        translation = model(source)
        
        # source = data.src
        # target = data.trg
        # translation = model(source, target)
        translation = translation.reshape(-1, translation.shape[-1])
        target = target.reshape(-1)

        optimizer.zero_grad()
        loss = criterion(translation, target)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss
        progress_bar.set_description_str("Batch: %d, Loss: %.4f" % ((batch_idx+1), loss.item()))
    
    return total_loss, total_loss / len(dataloader)



def evaluate(model, dataloader, criterion):

    # Set the model to eval mode to avoid weights update
    model.eval()
    total_loss = 0.
    with torch.no_grad():
        # Get the progress bar 
        progress_bar = tqdm(dataloader, ascii = True)
        for batch_idx, data in enumerate(progress_bar):
            ### REPLACE source/target for Transformer or Seq2Seq w/o Attention ###
            source = data.src.transpose(1,0)
            target = data.trg.transpose(1,0)
            translation = model(source)
            
            
            # source = data.src
            # target = data.trg
            # translation = model(source, target)
            translation = translation.reshape(-1, translation.shape[-1])
            target = target.reshape(-1)

            loss = criterion(translation, target)
            total_loss += loss
            progress_bar.set_description_str("Batch: %d, Loss: %.4f" % ((batch_idx+1), loss.item()))
    
    avg_loss = total_loss / len(dataloader)
    return total_loss, avg_loss

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

# Training the Model

In [None]:
N_EPOCHS = 10
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    start_time = time.time()
    train_loss = train(model, train_loader, optimizer, criterion)[1]
    valid_loss = evaluate(model, valid_loader, criterion)[1]
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')

    print("\n")
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if __name__ == '__main__':


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.7779: 100%|##########| 25/25 [00:02<00:00,  8.73it/s]




Epoch: 01 | Time: 0m 35s
	Train Loss: 7.150 | Train PPL: 1273.831
	 Val. Loss: 6.938 |  Val. PPL: 1030.686


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.6496: 100%|##########| 25/25 [00:02<00:00,  8.76it/s]




Epoch: 02 | Time: 0m 35s
	Train Loss: 6.792 | Train PPL: 890.510
	 Val. Loss: 6.784 |  Val. PPL: 883.628


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.5808: 100%|##########| 25/25 [00:02<00:00,  8.83it/s]




Epoch: 03 | Time: 0m 35s
	Train Loss: 6.675 | Train PPL: 792.105
	 Val. Loss: 6.722 |  Val. PPL: 830.439


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.5194: 100%|##########| 25/25 [00:02<00:00,  8.81it/s]




Epoch: 04 | Time: 0m 35s
	Train Loss: 6.593 | Train PPL: 729.893
	 Val. Loss: 6.661 |  Val. PPL: 781.295


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.4857: 100%|##########| 25/25 [00:02<00:00,  8.78it/s]




Epoch: 05 | Time: 0m 35s
	Train Loss: 6.521 | Train PPL: 679.141
	 Val. Loss: 6.623 |  Val. PPL: 752.339


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.4661: 100%|##########| 25/25 [00:02<00:00,  8.86it/s]




Epoch: 06 | Time: 0m 35s
	Train Loss: 6.462 | Train PPL: 640.404
	 Val. Loss: 6.590 |  Val. PPL: 727.627


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.4415: 100%|##########| 25/25 [00:02<00:00,  8.67it/s]




Epoch: 07 | Time: 0m 35s
	Train Loss: 6.413 | Train PPL: 610.010
	 Val. Loss: 6.568 |  Val. PPL: 711.803


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.4238: 100%|##########| 25/25 [00:02<00:00,  8.82it/s]




Epoch: 08 | Time: 0m 35s
	Train Loss: 6.374 | Train PPL: 586.683
	 Val. Loss: 6.565 |  Val. PPL: 709.902


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.4157: 100%|##########| 25/25 [00:02<00:00,  8.77it/s]




Epoch: 09 | Time: 0m 35s
	Train Loss: 6.341 | Train PPL: 567.609
	 Val. Loss: 6.554 |  Val. PPL: 701.999


  0%|          | 0/99 [00:00<?, ?it/s]

Batch: 25, Loss: 6.3922: 100%|##########| 25/25 [00:02<00:00,  8.88it/s]




Epoch: 10 | Time: 0m 35s
	Train Loss: 6.308 | Train PPL: 548.981
	 Val. Loss: 6.527 |  Val. PPL: 683.516


# Evaluating Outputs

In [None]:
def convert(model, dataloader):
    print(model)
    model.eval()
    with torch.no_grad():
        progress_bar = tqdm(dataloader, ascii = True)
        for batch_idx, data in enumerate(progress_bar):
            source = data.src.transpose(1,0)
            target = data.trg.transpose(1,0)

            # translation = model(source, target)
            translation = model(source)
            return source, target, translation

In [None]:
model = model
source, target, translation = convert(model, valid_loader)
a_tl = torch.argmax(translation, dim=2)
translation.size()

Seq2Seq(
  (encoder): Encoder(
    (embedding_layer): Embedding(16948, 256)
    (recurrent_layer): LSTM(256, 256, batch_first=True)
    (linear_1): Linear(in_features=256, out_features=256, bias=True)
    (relu_layer): ReLU()
    (linear_2): Linear(in_features=256, out_features=256, bias=True)
    (dropout_layer): Dropout(p=0.1, inplace=False)
  )
  (decoder): Decoder(
    (embedding_layer): Embedding(14445, 256)
    (recurrent_layer): LSTM(256, 256, batch_first=True)
    (linear): Linear(in_features=256, out_features=14445, bias=True)
    (log_softmax): LogSoftmax(dim=None)
    (dropout): Dropout(p=0.1, inplace=False)
  )
)


  0%|          | 0/25 [00:00<?, ?it/s]


torch.Size([128, 50, 14445])

In [None]:
src_out = np.array([list(map(lambda x: SRC.vocab.itos[x], source[i])) for i in range(source.shape[0])])
print(src_out.tolist())

[['<sos>', 'o', 'eastern', '<unk>', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], ['<sos>', 'he', 'is', 'not', 'the', 'flower', 'of', 'courtesy,', 'but,', 'i’ll', 'warrant', 'him,', 'as', 'gentle', 'as', 'a', '<unk>', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], ['<sos>', 'let', '<unk>', 'appear.', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>

In [None]:
target_out = np.array([list(map(lambda x: TRG.vocab.itos[x], target[i])) for i in range(target.shape[0])])
print(target_out.tolist())

[['<sos>', 'oh,', '<unk>', '<unk>', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], ['<sos>', 'he’s', 'not', 'the', 'most', 'polite', 'man', 'in', 'the', 'world,', 'but,', 'believe', 'me,', 'he’s', 'gentle', 'as', 'a', '<unk>', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], ['<sos>', 'be', '<unk>', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '

In [None]:
tl = np.array([list(map(lambda x: TRG.vocab.itos[x], a_tl[i])) for i in range(a_tl.shape[0])])
print(tl.tolist())

[['<unk>', 'oh,', 'is', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>'], ['<unk>', 'he', 'he', 'he', 'a', 'a', 'and', 'and', 'and', 'he', 'and', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>'], ['<unk>', 'let’s', 'the', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<e

In [None]:
remove_strs = ['<pad>', '<sos>', "<eos>"]
src_list = src_out.tolist()
target_list = target_out.tolist()
tl_list = tl.tolist()

for i in range(len(src_list)):
  for j in remove_strs:
    while j in src_list[i]:
      src_list[i].remove(j)

for i in range(len(target_list)):
  for j in remove_strs:
    while j in target_list[i]:
      target_list[i].remove(j)

for i in range(len(tl_list)):
  for j in remove_strs:
    while j in tl_list[i]:
      tl_list[i].remove(j)

src_strs = [' '.join(x) for x in src_list]
target_strs = [' '.join(x) for x in target_list]
tl_strs = [' '.join(x) for x in tl_list]

for i in range(0, len(src_strs), 5):
  print("Input: ", src_strs[i])
  print("Target: ", target_strs[i])
  print("Outputs: ", tl_strs[i])
  print("\n")

[['oh,', '<unk>', '<unk>'], ['he’s', 'not', 'the', 'most', 'polite', 'man', 'in', 'the', 'world,', 'but,', 'believe', 'me,', 'he’s', 'gentle', 'as', 'a', '<unk>'], ['be', '<unk>'], ['you’ll', 'discover', 'that', 'his', '<unk>', '<unk>', 'were', 'like', 'the', '<unk>', 'that', 'well,', 'you’re', 'wrong,', 'my', 'lord', 'high', 'constable.'], ['what', 'do', 'you', 'say', 'now?'], ['the', 'old', 'priest', 'at', 'saint', 'luke’s', 'church', 'is', 'at', 'your', 'command', 'at', 'all', 'hours.'], ['the', '<unk>', '<unk>'], ['<unk>', 'hello.'], ['you', 'speak', 'well.'], ['so', 'the', 'cooks', 'who', 'can’t', '<unk>', 'their', 'fingers', 'aren’t', '<unk>'], ['i’ve', 'been', '<unk>', 'i', 'arranged', 'for', 'a', 'friend', 'of', 'mine', 'to', 'marry', 'a', 'common', 'whore.'], ['yes,', 'sir,', 'but', 'not', 'when', 'i’m', '<unk>'], ['oh,', 'madam,', 'my', 'old', 'heart', 'is', '<unk>', 'broken.'], ['so,', 'what’s', 'the', 'news', 'on', 'the', '<unk>'], ['it’s', 'going', 'to', 'be', 'fun,', 'i',

In [None]:
model.load_state_dict(torch.load('tut2-model.pt'))

test_loss = evaluate(model, valid_loader, criterion)[1]
print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

Batch: 25, Loss: 5.1993: 100%|##########| 25/25 [00:01<00:00, 19.01it/s]

| Test Loss: 5.310 | Test PPL: 202.275 |





In [None]:
example_idx = 8999
example = train_data.examples[example_idx]
print('source sentence: ', ' '.join(example.src))
print('target sentence: ', ' '.join(example.trg))

source sentence:  fare you well.
target sentence:  goodbye.


In [None]:
src_tensor = SRC.process([example.src]).to(device)
trg_tensor = TRG.process([example.trg]).to(device)
print(trg_tensor.shape)

model.eval()
with torch.no_grad():
    # outputs = model(src_tensor, trg_tensor, teacher_forcing_ratio=0)
    outputs = model(src_tensor)

outputs.shape

torch.Size([50, 1])


torch.Size([50, 1, 14445])

In [None]:
output_idx = outputs[1:].squeeze(1).argmax(1)
' '.join([TRG.vocab.itos[idx] for idx in output_idx])

'the you of <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos>'

# Metrics

In [None]:
!pip install -U nltk
import nltk
nltk.download('wordnet')

from nltk.translate.bleu_score import corpus_bleu, sentence_bleu
from nltk.translate.meteor_score import meteor_score

target_list_corpus = []
for i in target_list:
  target_list_corpus.append([i])
print(target_list_corpus)

def bleu(target, prediction):  #Uses micro-average precision
  return corpus_bleu(target, prediction, weights=(0.4, 0.6))

def s_bleu(target, prediction):
  return sentence_bleu(target, prediction, weights = (0.4, 0.6))

def meteor(target, prediction): #Uses macro-average precision
  return meteor_score(target, prediction)

print(bleu(target_list_corpus, tl_list))

m_score = 0
for i in range(len(target_list)):
  m_score += meteor([target_list[i]], tl_list[i])
print("METEOR: ")
print(m_score/len(target_list))
print("BLEU: ")
print(b_score/len(target_list))

[[['oh,', '<unk>', '<unk>']], [['he’s', 'not', 'the', 'most', 'polite', 'man', 'in', 'the', 'world,', 'but,', 'believe', 'me,', 'he’s', 'gentle', 'as', 'a', '<unk>']], [['be', '<unk>']], [['you’ll', 'discover', 'that', 'his', '<unk>', '<unk>', 'were', 'like', 'the', '<unk>', 'that', 'well,', 'you’re', 'wrong,', 'my', 'lord', 'high', 'constable.']], [['what', 'do', 'you', 'say', 'now?']], [['the', 'old', 'priest', 'at', 'saint', 'luke’s', 'church', 'is', 'at', 'your', 'command', 'at', 'all', 'hours.']], [['the', '<unk>', '<unk>']], [['<unk>', 'hello.']], [['you', 'speak', 'well.']], [['so', 'the', 'cooks', 'who', 'can’t', '<unk>', 'their', 'fingers', 'aren’t', '<unk>']], [['i’ve', 'been', '<unk>', 'i', 'arranged', 'for', 'a', 'friend', 'of', 'mine', 'to', 'marry', 'a', 'common', 'whore.']], [['yes,', 'sir,', 'but', 'not', 'when', 'i’m', '<unk>']], [['oh,', 'madam,', 'my', 'old', 'heart', 'is', '<unk>', 'broken.']], [['so,', 'what’s', 'the', 'news', 'on', 'the', '<unk>']], [['it’s', 'goi

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


#Sonnets Test Set

In [None]:
sonnet_lines = []
for filename in os.listdir(data_dir+"/sonnets"):
  num_lines = 0
  with open(os.path.join(data_dir+"/sonnets/", filename), "r") as f:
    for line in f:
      sonnet_lines.append(line.strip())
      num_lines +=1
print(num_lines)

255


In [None]:
TST_SRC = Field(
            tokenizer_language = "en",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)

fields = [("tst", TST_SRC),("trg",TRG)]

with open("test.tsv", "w") as p:
  for i in range(len(sonnet_lines)):
    if sonnet_lines[i] is not "":
      inp = sonnet_lines[i]
      trg = ""
      p.write(inp + "\t" + trg + "\n")

test_data = TabularDataset.splits(
    path='',
    test="test.tsv",
    format="tsv",
    fields=fields,
    skip_header=False
)

test_loader = BucketIterator.splits(
    (test_data),
    batch_size=BATCH_SIZE,
    device=device,
    sort=False
)

In [None]:
def word2index(sentence):
  vocab = SRC.vocab.freqs
  new_sent = []
  for word in sentence:
    new_sent.append(vocab[word])
  return torch.tensor([new_sent])

def test(model, dataloader):
    model.eval()
    with torch.no_grad():
        sources = []
        translations = []
        dataloader = dataloader[0]
        progress_bar = tqdm(dataloader.dataset.examples, ascii = True)
        for batch_idx, data in enumerate(progress_bar):
            source = data.tst  
            target = None

            translation = model(word2index(data.tst))
        #   translation = model(source, target)
            
            sources.append(source)
            translations.append(translation)

    return sources, translations
sources , translations = test(model, test_loader)
print("\n")
translations_conv = []
for translated_sentence in translations:
  argmaxed = torch.argmax(translated_sentence, dim=2)
  translated_sent = []
  for elem in argmaxed[0]:
    translated_sent.append(TRG.vocab.itos[elem])
  translations_conv.append(translated_sent)

print(len(sources), len(translations_conv))

100%|##########| 225/225 [00:00<00:00, 462.57it/s]




225 225


In [None]:
remove_strs = ['<pad>', '<sos>', "<eos>"]
src_list = sources
translations_list = translations_conv

for i in range(len(src_list)):
  for j in remove_strs:
    while j in src_list[i]:
      src_list[i].remove(j)


for i in range(len(tl_list)):
  for j in remove_strs:
    while j in translations_list[i]:
      translations_list[i].remove(j)

src_strs = [' '.join(x) for x in src_list]
translations_list_strs = [' '.join(x) for x in translations_list]

for i in range(0, len(translations_list_strs), 20):
  print("Input: ", src_strs[i])
  print("Outputs: ", translations_list_strs[i])
  print("\n")

225
225
[['oh,', '<unk>', '<unk>'], ['he’s', 'not', 'the', 'most', 'polite', 'man', 'in', 'the', 'world,', 'but,', 'believe', 'me,', 'he’s', 'gentle', 'as', 'a', '<unk>'], ['be', '<unk>'], ['you’ll', 'discover', 'that', 'his', '<unk>', '<unk>', 'were', 'like', 'the', '<unk>', 'that', 'well,', 'you’re', 'wrong,', 'my', 'lord', 'high', 'constable.'], ['what', 'do', 'you', 'say', 'now?'], ['the', 'old', 'priest', 'at', 'saint', 'luke’s', 'church', 'is', 'at', 'your', 'command', 'at', 'all', 'hours.'], ['the', '<unk>', '<unk>'], ['<unk>', 'hello.'], ['you', 'speak', 'well.'], ['so', 'the', 'cooks', 'who', 'can’t', '<unk>', 'their', 'fingers', 'aren’t', '<unk>'], ['i’ve', 'been', '<unk>', 'i', 'arranged', 'for', 'a', 'friend', 'of', 'mine', 'to', 'marry', 'a', 'common', 'whore.'], ['yes,', 'sir,', 'but', 'not', 'when', 'i’m', '<unk>'], ['oh,', 'madam,', 'my', 'old', 'heart', 'is', '<unk>', 'broken.'], ['so,', 'what’s', 'the', 'news', 'on', 'the', '<unk>'], ['it’s', 'going', 'to', 'be', 'fun

# Pre-Trained Model Attempts

In [None]:
!pip install sentencepiece
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import torch.nn.functional as F
from torch import cuda
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

df = pd.DataFrame(list(zip(inputs, targets)), columns=['source_text', 'target_text'])
device = 'cuda' if cuda.is_available() else 'cpu'

df.head()




Unnamed: 0,source_text,target_text
0,Riddling confession finds but riddling shrift.,A jumbled confession can only receive a jumble...
1,Then plainly know my heart’s dear love is set ...,I love rich Capulet’s daughter.
2,"As mine on hers, so hers is set on mine, And a...",We’re bound to each other in every possible wa...
3,"When and where and how We met, we wooed and ma...",I’ll tell you more later about when and where ...
4,"Holy Saint Francis, what a change is here!","Holy Saint Francis, this is a drastic change!"


In [None]:
class StyleDataset(Dataset):
  def __init__(self, dataframe, tokenizer, source_len, target_len, source_text, target_text):
    self.tokenizer = tokenizer
    self.data = dataframe
    self.source_len = source_len
    self.target_len = target_len
    self.target_text = self.data[target_text]
    self.source_text = self.data[source_text]

  def __len__(self):
    return len(self.target_text)

  def __getitem__(self, index):
    source_text = str(self.source_text[index])
    target_text = str(self.target_text[index])

    source_text = " ".join(source_text.split())
    target_text = " ".join(target_text.split())

    source = self.tokenizer.batch_encode_plus(
    [source_text],
    max_length=self.source_len,
    pad_to_max_length=True,
    truncation=True,
    padding="max_length",
    return_tensors="pt",
    )
    target = self.tokenizer.batch_encode_plus(
    [target_text],
    max_length=self.target_len,
    pad_to_max_length=True,
    truncation=True,
    padding="max_length",
    return_tensors="pt",
    )

    source_ids = source["input_ids"].squeeze()
    source_mask = source["attention_mask"].squeeze()
    target_ids = target["input_ids"].squeeze()
    target_mask = target["attention_mask"].squeeze()

    return {
      "source_ids": source_ids.to(dtype=torch.long),
      "source_mask": source_mask.to(dtype=torch.long),
      "target_ids": target_ids.to(dtype=torch.long),
      "target_ids_y": target_ids.to(dtype=torch.long),
      }

In [None]:
def train(epoch, tokenizer, model, device, loader, optimizer):
  model.train()
  for _, data in enumerate(loader, 0):
      y = data["target_ids"].to(device, dtype=torch.long)
      y_ids = y[:, :-1].contiguous()
      lm_labels = y[:, 1:].clone().detach()
      lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
      ids = data["source_ids"].to(device, dtype=torch.long)
      mask = data["source_mask"].to(device, dtype=torch.long)

      outputs = model(
        input_ids=ids,
        attention_mask=mask,
        decoder_input_ids=y_ids,
        labels=lm_labels,
      )
      loss = outputs[0]

      if _%200==0:
        print(f'Epoch {epoch}:, Loss: {loss}, Perplexity: {torch.exp(loss)}')

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()



In [None]:
def validate(epoch, tokenizer, model, device, loader):
  model.eval()
  predictions = []
  targets = []
  with torch.no_grad():
    for _, data in enumerate(loader, 0):
      y = data['target_ids'].to(device, dtype = torch.long)
      ids = data['source_ids'].to(device, dtype = torch.long)
      mask = data['source_mask'].to(device, dtype = torch.long)

      output = model.generate(
        input_ids = ids,
        attention_mask = mask, 
        max_length=150, 
        num_beams=2,
        repetition_penalty=2.5, 
        length_penalty=1.0, 
        early_stopping=True
        )
      
      y_ids = y[:, :-1].contiguous()
      lm_labels = y[:, 1:].clone().detach()
      lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
      outputs = model(
        input_ids=ids,
        attention_mask=mask,
        decoder_input_ids=y_ids,
        labels=lm_labels,
      )
      loss = outputs[0]

      preds = []
      for o in output:
        preds.append(tokenizer.decode(o, skip_special_tokens=True, clean_up_tokenization_spaces=True))
      predictions.extend(preds)
      target = []
      for y_hat in y:
        target.append(tokenizer.decode(y_hat, skip_special_tokens=True, clean_up_tokenization_spaces=True))
      if _%50==0:
        print(f'Epoch {epoch}:, Loss: {loss}, Perplexity: {torch.exp(loss)}')
      targets.extend(target)
  return predictions, actuals



In [None]:
def T5Trainer(dataframe, source_text, target_text, model_params, output_dir="./outputs/"):
  print(f"Loading {model_params["MODEL"]}\n")

  tokenizer = T5Tokenizer.from_pretrained(model_params["MODEL"])
  model = T5ForConditionalGeneration.from_pretrained(model_params["MODEL"])
  model = model.to(device)

  dataframe = dataframe[[source_text, target_text]]
  train_size = 0.8
  train_dataset = dataframe.sample(frac=train_size)
  val_dataset = dataframe.drop(train_dataset.index).reset_index(drop=True)
  train_dataset = train_dataset.reset_index(drop=True)

  print(f"Train Dataset: {train_dataset.shape}, Test Dataset: {val_dataset.shape}\n")
  training_set = StyleDataset(
    train_dataset,
    tokenizer,
    model_params["MAX_SOURCE_TEXT_LENGTH"],
    model_params["MAX_TARGET_TEXT_LENGTH"],
    source_text,
    target_text,
  )
  val_set = StyleDataset(
    val_dataset,
    tokenizer,
    model_params["MAX_SOURCE_TEXT_LENGTH"],
    model_params["MAX_TARGET_TEXT_LENGTH"],
    source_text,
    target_text,
  )

  train_params = {
    "batch_size": model_params["TRAIN_BATCH_SIZE"],
    "shuffle": True,
    "num_workers": 0,
  }
  val_params = {
    "batch_size": model_params["VALID_BATCH_SIZE"],
    "shuffle": False,
    "num_workers": 0,
  }

  train_loader = DataLoader(training_set, **train_params)
  val_loader = DataLoader(val_set, **val_params)

  optimizer = torch.optim.Adam(params=model.parameters(), lr=model_params["LEARNING_RATE"])

  print(f"Training\n")
  for epoch in range(model_params["TRAIN_EPOCHS"]):
    train(epoch, tokenizer, model, device, train_loader, optimizer)

  print(f"Model Saved\n")
  path = os.path.join(output_dir, "model_files")
  model.save_pretrained(path)
  tokenizer.save_pretrained(path)

  print(f"Validation\n")
  for epoch in range(model_params["VAL_EPOCHS"]):
    predictions, actuals = validate(epoch, tokenizer, model, device, val_loader)
    final_df = pd.DataFrame({"Generated Text": predictions, "Actual Text": actuals})
    final_df.to_csv(os.path.join(output_dir, "predictions.csv"))

  print(f"Model saved at {os.path.join(output_dir, "model_files")}\n")
  print(f"Test outputs saved at {os.path.join(output_dir,'predictions.csv')}\n")


In [None]:
model_params = {
  "MODEL": "t5-small",  
  "MAX_SOURCE_TEXT_LENGTH": 50, 
  "MAX_TARGET_TEXT_LENGTH": 50,
  "TRAIN_BATCH_SIZE": 8, 
  "VALID_BATCH_SIZE": 8, 
  "TRAIN_EPOCHS": 3,  
  "VAL_EPOCHS": 1, 
  "LEARNING_RATE": 1e-4, 
}


In [None]:
df['source_text'] = "translate: " + df['source_text']

T5Trainer(
  dataframe=df,
  source_text="source_text",
  target_text="target_text",
  model_params=model_params,
  output_dir=base_dir,
)



[Model]: Loading t5-small...

[Data]: Reading data...

FULL Dataset: (15825, 2)
TRAIN Dataset: (12660, 2)
TEST Dataset: (3165, 2)

[Initiating Fine Tuning]...

Epoch 0: , Loss: 4.89830207824707 , Perplexity: 134.06195068359375
Epoch 0: , Loss: 1.7635443210601807 , Perplexity: 5.833075523376465
Epoch 0: , Loss: 1.8753163814544678 , Perplexity: 6.522882461547852
Epoch 0: , Loss: 2.4307737350463867 , Perplexity: 11.367673873901367
Epoch 0: , Loss: 2.1593363285064697 , Perplexity: 8.665384292602539
Epoch 0: , Loss: 1.8743089437484741 , Perplexity: 6.516314506530762
Epoch 0: , Loss: 1.7388269901275635 , Perplexity: 5.690664291381836
Epoch 0: , Loss: 2.0697734355926514 , Perplexity: 7.923027992248535
Epoch 1: , Loss: 1.9992225170135498 , Perplexity: 7.3833136558532715
Epoch 1: , Loss: 1.3037452697753906 , Perplexity: 3.6830649375915527
Epoch 1: , Loss: 3.2679738998413086 , Perplexity: 26.258085250854492
Epoch 1: , Loss: 1.607812762260437 , Perplexity: 4.991880893707275
Epoch 1: , Loss: 2.142

In [None]:
new_df = pd.read_csv(os.path.join(base_dir,'predictions.csv'))
new_df

Unnamed: 0.1,Unnamed: 0,Generated Text,Actual Text
0,0,Thosen you know my heart’s dear love is set on...,I love rich Capulet’s daughter.
1,1,"Holy Saint Francis, what a change is here!","Holy Saint Francis, this is a drastic change!"
2,2,"Is Rosaline, whom you love so dearly, so soon ...","Have you given up so quickly on Rosaline, whom..."
3,3,"Unless you were yourself, and these woes are y...","If you were ever yourself, and this sadness wa..."
4,4,Pronounce this sentence then.,Then repeat this after me: you can’t expect wo...
...,...,...,...
3160,3160,"It’s my fashion, when I see a crab.","That’s my way, when I see a crab-apple."
3161,3161,"Unless I had a glass, I would.","I would, if I had a mirror."
3162,3162,"Nevertheless, you’re withered.","Maybe, but you’re wrinkled all the same."
3163,3163,A witty mother!,It’s born of my A witty mother!


In [None]:
target_list = list(new_df['Actual Text'])
tl_list = list(new_df['Generated Text'])

target_list = [i.split(" ") for i in target_list]
tl_list = [i.split(" ") for i in tl_list]
print(tl_list)

target_list_corpus = []
for i in target_list:
  target_list_corpus.append([i])
print(target_list_corpus)

print(bleu(target_list_corpus, tl_list))
m_score = 0
for i in range(len(target_list)):
  m_score += meteor([target_list[i]], tl_list[i])
print("METEOR: ")
print(m_score/len(target_list))

[['Thosen', 'you', 'know', 'my', 'heart’s', 'dear', 'love', 'is', 'set', 'on', 'the', 'fair', 'daughter', 'of', 'rich', 'Capulet.'], ['Holy', 'Saint', 'Francis,', 'what', 'a', 'change', 'is', 'here!'], ['Is', 'Rosaline,', 'whom', 'you', 'love', 'so', 'dearly,', 'so', 'soon', 'forsaken?'], ['Unless', 'you', 'were', 'yourself,', 'and', 'these', 'woes', 'are', 'yours,', 'you', 'and', 'Rosaline', 'were', 'all', 'for', 'you.'], ['Pronounce', 'this', 'sentence', 'then.'], ['I’m', 'a', 'pupil', 'for', 'doingting,', 'not', 'for', 'loving.'], ['I’ll', 'beg', 'you,', 'don’t', 'chide', 'me.'], ['I’m', 'ready', 'for', 'a', 'sudden', 'grab.'], ['Ohne', 'his', 'roe,', 'like', 'a', 'dried', 'herring.'], ['Switch', 'and', 'spurs,', 'switch', 'and', 'spurs,', 'switch', 'and', 'spurs,', 'or', 'I’ll', 'cry.'], ['Nay,', 'if', 'our', 'wits', 'run', 'the', 'wild-goose', 'chase,', 'I’m', 'done.'], ['Delicate', 'wit', 'is', 'a', 'very', 'bitter', 'sweeting.'], ['I’m', 'referring', 'to', 'the', 'word', '“broad

NameError: ignored