In [353]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn

from torchtext.data import Field, BucketIterator, TabularDataset
from torchtext.data.metrics import bleu_score

import spacy
import string

from utils import init_random_seed, train_eval_loop_lstm, \
    get_params_number, train_eval_loop_attention
from modelLSTM import Encoder, Decoder, Seq2Seq
from modelAttention import Transformer

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [354]:
init_random_seed(1)

In [355]:
path_dataset = './Data/multi30k'

spacy_ger = spacy.load('de_core_news_sm')
spacy_eng = spacy.load('en_core_web_sm')

In [356]:
def tokenizer_ger(text):
    return [tok.text for tok in spacy_ger.tokenizer(text) if tok.text not in string.punctuation]

In [357]:
def tokenizer_eng(text):
    return [tok.text for tok in  spacy_eng.tokenizer(text) if tok.text not in string.punctuation]

In [358]:
german = Field(tokenize=tokenizer_ger, 
               lower=True,
               init_token='<sos>',
               eos_token='<eos>')

english = Field(tokenize=tokenizer_eng,
               lower=True,
               init_token='<sos>',
               eos_token='<eos>')

fields = [('english', english), ('german', german)]

In [359]:
train_data, valid_data, test_data = TabularDataset.splits(
    path=path_dataset,
    train='train.csv',
    validation='valid.csv',
    test='test.csv',
    format='csv',
    fields=fields,
    skip_header=True
)

In [360]:
len(train_data), len(valid_data), len(test_data)

(28000, 1000, 1000)

In [361]:
german.build_vocab(train_data, max_size=10000, min_freq=2)
english.build_vocab(train_data, max_size=10000, min_freq=2)

In [362]:
len(german.vocab), len(english.vocab)

(7665, 5757)

In [363]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
load_model = True

In [364]:
# save_path_model = '../../Pre-trained Models/Transformer(LSTM) For Translation.pth'
# 
# num_epochs = 25
# lr = 1.0e-3
# batch_size = 128
# early_stopping_patience = 5
# scheduler_patience = 3
# teacher_force_ratio = 0.3
# 
# input_size_encoder = len(german.vocab)
# input_size_decoder = len(english.vocab)
# output_size = len(english.vocab)
# encoder_embedding_size = 100
# decoder_embedding_size = 100
# hidden_size = 512
# num_layers = 2
# enc_dropout = 0.3
# dec_dropout = 0.3

In [365]:
# encoder_net = Encoder(input_size_encoder,
#                       encoder_embedding_size,
#                       hidden_size,
#                       num_layers,
#                       enc_dropout).to(device)
# 
# decoder_net = Decoder(input_size_decoder,
#                       decoder_embedding_size,
#                       hidden_size,
#                       output_size,
#                       num_layers,
#                       dec_dropout).to(device)
# 
# model = Seq2Seq(encoder_net, decoder_net, device, len(english.vocab)).to(device)

In [366]:
save_path_model = './Pre-trained/Transformer(Attention) For Translation.pth'

num_epochs = 15
lr = 1.0e-3
batch_size = 2
early_stopping_patience = 3
scheduler_patience = 2

src_vocab_size = len(german.vocab)
trg_vocab_size = len(english.vocab)
embedding_size = 128
num_heads = 8
num_encoder_layers = 3
num_decoder_layers = 3
dropout = 0.1
max_len = 40
forward_expansion = 2048
src_pad_idx = english.vocab.stoi['<pad>']

In [367]:
model = Transformer(
    embedding_size,
    src_vocab_size,
    trg_vocab_size,
    src_pad_idx,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    forward_expansion,
    dropout,
    max_len,
    device
).to(device)

In [368]:
if load_model:
    model.load_state_dict(torch.load(save_path_model, map_location=device))

RuntimeError: Error(s) in loading state_dict for Transformer:
	size mismatch for src_position_embedding.weight: copying a param with shape torch.Size([100, 128]) from checkpoint, the shape in current model is torch.Size([40, 128]).
	size mismatch for trg_position_embedding.weight: copying a param with shape torch.Size([100, 128]) from checkpoint, the shape in current model is torch.Size([40, 128]).

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
pad_idx = english.vocab.stoi['<pad>']
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

In [None]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       patience=scheduler_patience,
                                                       verbose=True)

In [None]:
train_iterator, valid_iterator = BucketIterator.splits(
    (train_data, valid_data),
    batch_size=batch_size,
    shuffle=True,
    sort=False,
    device=device
)

In [None]:
get_params_number(model)

In [380]:
best_model = train_eval_loop_attention(model,
                                       train_iterator, valid_iterator,
                                       optimizer, criterion,
                                       num_epochs,
                                       early_stopping_patience,
                                       scheduler)

Epoch [0 / 15]


  0%|          | 0/14000 [00:00<?, ?it/s]

torch.Size([16, 2])
tensor([[   2,    2],
        [   4,    4],
        [   8,    8],
        [ 435, 1884],
        [   4,    4],
        [1082,   67],
        [   8, 4807],
        [  66, 1974],
        [   4,   12],
        [2089,   49],
        [  26,  828],
        [  80,    7],
        [   8,    4],
        [ 140,  304],
        [   6,    3],
        [3413,    1]], device='cuda:0')
tensor([[0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0.,




In [116]:
# best_model = train_eval_loop_lstm(model,
#                                   train_iterator, valid_iterator,
#                                   optimizer, criterion,
#                                   num_epochs,
#                                   early_stopping_patience,
#                                   scheduler,
#                                   teacher_force_ratio)

In [117]:
# torch.save(best_model.state_dict(), save_path_model)

In [118]:
# best_model = train_eval_loop_attention(model,
#                                        train_iterator, valid_iterator,
#                                        optimizer, criterion,
#                                        num_epochs,
#                                        early_stopping_patience,
#                                        scheduler)

In [119]:
# torch.save(best_model.state_dict(), save_path_model)

In [120]:
# def translate_sentence(model, sentence, german, english, device, max_length=50):
#     
#     # Create tokens using spacy and everything in lower case (which is what our vocab is)
#     if type(sentence) == str:
#         tokens = [tok.text.lower() for tok in spacy_ger.tokenizer(sentence) if tok.text not in string.punctuation]
#     else:
#         tokens = [token.lower() for token in sentence]
# 
#     # Add <SOS> and <EOS> in beginning and end respectively
#     tokens.insert(0, german.init_token)
#     tokens.append(german.eos_token)
# 
#     # Go through each german token and convert to an index
#     text_to_indices = [german.vocab.stoi[token] for token in tokens]
# 
#     # Convert to Tensor
#     sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
# 
#     # Build encoder hidden, cell state
#     model.eval()
#     with torch.no_grad():
#         hidden, cell = model.encoder(sentence_tensor)
#     
#     outputs = [english.vocab.stoi['<sos>']]
#     
#     for _ in range(max_length):
#         previous_word = torch.LongTensor([outputs[-1]]).to(device)
#     
#         with torch.no_grad():
#             output, hidden, cell = model.decoder(previous_word, hidden, cell)
#     
#             best_guess = output.argmax(1).item()
#     
#         outputs.append(best_guess)
#     
#         # Model predicts it's the end of the sentence
#         if outputs[-1] == english.vocab.stoi['<eos>']:
#             break
#     
#     translated_sentence = [english.vocab.itos[idx] for idx in outputs]
#     
#     return translated_sentence[1:-1]

In [167]:
def translate_sentence(model, sentence, german, english, device, max_length=50):

    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    if type(sentence) == str:
        tokens = [tok.text.lower() for tok in spacy_ger.tokenizer(sentence) if tok.text not in string.punctuation]
    else:
        tokens = [token.lower() for token in sentence]

    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    # Go through each german token and convert to an index
    text_to_indices = [german.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
    outputs = [english.vocab.stoi['<sos>']]
    
    model.eval()
    
    for _ in range(max_length):
        trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)
        
        with torch.no_grad():
            output = model(sentence_tensor, trg_tensor)
        
        best_guess = output.argmax(2)[-1, :].item()
        
        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if outputs[-1] == english.vocab.stoi['<eos>']:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]

    return translated_sentence[1:-1]

In [168]:
sentence = 'Ein junges Mädchen schwimmt in einem Pool'

trans_sent = translate_sentence(model, sentence,
                                german, english,
                                device, max_length=100)

In [169]:
print(' '.join(trans_sent))

a young girl swimming in a pool


In [130]:
def bleu(data, model, german, english, device):
    targets = []
    outputs = []
    orig = []
    
    for example in data:
        src = example.german
        trg = example.english
        
        prediction = translate_sentence(model, src, 
                                        german, english, 
                                        device,
                                        max_length=50)
        
        orig.append(src)
        targets.append([trg])
        outputs.append(prediction)
        
    return bleu_score(outputs, targets), outputs, targets, orig

In [131]:
bleu_num, outputs, targets, orig = bleu(test_data, model, german, english, device)

In [132]:
print(f'Blue Score: {bleu_num*100:.2f}')

Blue Score: 35.25


In [133]:
num_example = 5

print("\tGerman text:")
for i in range(num_example):
    print(' '.join(orig[-i]))
    
print()
print("\tEnglish text:")
for i in range(num_example):
    print(' '.join(targets[-i][0]))
    
print()
print("\tNeural text:")
for i in range(num_example):
    print(' '.join(outputs[-i]))

	German text:
ein mann mit einem orangefarbenen hut der etwas anstarrt
ein mädchen an einer küste mit einem berg im hintergrund
ein älterer mann spielt ein videospiel
ein paar kinder sind im freien und spielen auf dem boden bei zwei bäumen
asiatische frau trägt einen sonnenhut beim fahrradfahren

	English text:
a man in an orange hat starring at something
a girl at the shore of a beach with a mountain in the distance
an older man is playing a video arcade game
some children are outside playing in the dirt where two trees are
asian woman wearing a sunhat while riding a bike

	Neural text:
a man in an orange hat <unk> something
a girl on a shore with a mountain in the background
an older man is playing a video game
a few children are outside playing and playing on the floor near two trees
asian woman carrying a sunhat around her fishing pole
