In [15]:
%run LanguageModel.py
%run DataLoader.py
%run rnn_utils.py
%run encoder.py
%run decoder.py
%run seq2seq.py
%run model_config.py
%run metrics.py
%run ScorePrinter.py
import numpy as np
import math
import json

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MAX_LENGTH = 50

In [17]:
train_dl = DataLoader('train', ('de', 'en'), max_length = MAX_LENGTH, filter_token = 10, device = device)
val_dl = DataLoader('dev', ('de', 'en'), languageModels = train_dl.languageModels, max_length = MAX_LENGTH, device = device)

In [18]:
lm1 = train_dl.languageModels[train_dl.languages[0]]
lm2 = train_dl.languageModels[train_dl.languages[1]]
model_config = ModelConfig(input_size = lm1.n_tokens, 
                           beam_width = 3, 
                           hidden_size = 50, 
                           output_size = lm2.n_tokens, 
                           rnn_type='gru', 
                           bidirectional = False,
                           bidirectional_type='concat', 
                           attention = False, 
                           score = 'dot', 
                           learning_rate = 3 * 10**(-4),
                           max_length=MAX_LENGTH+2) # +2 for SOS and EOS
#checkpoint = torch.load("./state_dict.tar")
s2s = seq2seq(model_config=model_config, state_dict = None, device = device)

In [19]:
len(train_dl)

67051

In [20]:
len(lm1.index_token_map)

7565

In [21]:
max(list(lm1.index_token_map.keys()))

7564

In [22]:
lm1.n_tokens

7565

In [23]:
len(lm1.token_index_map)

7562

In [24]:
max(list(lm1.token_index_map.values()))

7564

In [25]:
def train_epochs(epochs, print_every=1000):
    n_iters = len(train_dl)
    score_printer = ScorePrinter("Training", [('NLL / len', loss_metric), ('NLL', nll_loss), ('Perplexity', perplexity)]) # ('BLEU', bleu)

    for epoch in range(1, epochs+1):
        score_printer.startEpoch(epoch)
        idx_permutation = np.random.permutation(len(train_dl))[:100]

        for i, index in enumerate(idx_permutation):
            input_tensor, target_tensor = train_dl.tensorsFromPos(index)

            loss, output_sentence = s2s.train(input_tensor, target_tensor)
            real_target_sentence, estimated_target_sentence = train_dl.real_estimated_sentence(target_tensor, output_sentence)
            score_printer.update(nll = loss, target_length = target_tensor.size(0), real_target_sentence=real_target_sentence, estimated_target_sentence=estimated_target_sentence)
            
            if (i + 1) % print_every == 0:
                score_printer.printAvg(print_every, last = 200)
        
        #score_printer.printAvg(len(train_dl))
        val_avg_score = validate(100)
        train_avg_score = score_printer.getAvgScores()
        with open('./Validation_scores' + str(epoch) + '.txt', 'a') as validation_scores, open('./Training_scores' + str(epoch) + '.txt', 'a') as training_scores:
            validation_scores.write(json.dumps(val_avg_score) + '\n')
            training_scores.write(json.dumps(train_avg_score) + '\n')
        #print(f"Val_avg_score: {val_avg_score}, train_avg_score: {train_avg_score}")
        score_printer.endEpoch(epoch)
        
        torch.save(s2s.state_dict(),"./state_dict_"+str(epoch)+".tar")

In [26]:
def validate(n = None):
    score_printer = ScorePrinter("Validation", [('NLL normalized', loss_metric),('Perplexity', perplexity)]) #('BLEU', bleu)
    n = n or len(val_dl)
    idx_permutation_val = np.random.permutation(len(val_dl))[:n]
    score_printer.beginMeasurements()
    for j, val_index in enumerate(idx_permutation_val):
        input_tensor_val, target_tensor_val = val_dl.tensorsFromPos(val_index)
        loss, output_sentence = s2s.evaluate(input_tensor_val, target_tensor_val)
        real_target_sentence, estimated_target_sentence = val_dl.real_estimated_sentence(target_tensor_val, output_sentence)
        print(f"real : {real_target_sentence}, est : {estimated_target_sentence}")
        score_printer.update(nll = loss, target_length = target_tensor_val.size(0), real_target_sentence  = real_target_sentence, estimated_target_sentence = estimated_target_sentence)
    score_printer.printAvg(showCount = False)
    return score_printer.getAvgScores()

In [27]:
train_epochs(20, print_every=15)



Epoch 1 started
 
[Training] 15 examples /  NLL / len: 8.54 NLL: 111.92 Perplexity: 6107.55  
[Training] 30 examples /  NLL / len: 8.65 NLL: 121.55 Perplexity: 6272.94  
[Training] 45 examples /  NLL / len: 8.64 NLL: 115.65 Perplexity: 6110.99  
[Training] 60 examples /  NLL / len: 8.60 NLL: 114.01 Perplexity: 5966.81  
[Training] 75 examples /  NLL / len: 8.54 NLL: 117.28 Perplexity: 5924.39  
[Training] 90 examples /  NLL / len: 8.55 NLL: 117.61 Perplexity: 5875.03 real : so to him , they were just blocks . EOS, est : these , know . EOS
real : it is interesting . EOS, est : know these know know know promise EOS
real : well , how do we UNK up a chromosome ? how do we activate this ? EOS, est : these , abstract present EOS
real : there are effects that you can control live , like UNK and filter . EOS, est : these know know big know know motor EOS
real : but we are beginning to see a sea change . EOS, est : know these know know know these big know EOS
real : and if we do , can we UNK 

In [14]:
from IPython.display import display, Markdown
def print_validation(position):
    input_sentence = val_dl.sentenceFromTensor('de', val_dl.tensorsFromPos(position)[0])
    display(Markdown('**Eingabe**'))
    display(Markdown(' '.join(input_sentence)))
    prediction = s2s.predict(val_dl.tensorsFromPos(position)[0])
    output_sentence = val_dl.sentenceFromTensor('en', prediction[0])
    display(Markdown('**Ausgabe**'))
    display(Markdown(' '.join(output_sentence)))
    attentions = torch.stack([tensor.squeeze() for tensor in prediction[2]])
    attentions = attentions.numpy()[:len(output_sentence)-1,:len(input_sentence)]
    display(Markdown('**Attention**'))
    show_attention(input_sentence, output_sentence, attentions)

In [17]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker

def show_attention(input_sentence, output_sentence, attentions):
    # Set up figure with colorbar
    fig = plt.figure(figsize=(16, 14), dpi= 80)
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions, cmap='bone')
    fig.colorbar(cax)
    
    ax.set_xticks(np.arange(len(input_sentence)))
    ax.set_xticklabels(input_sentence, rotation=90)
    ax.set_yticks(np.arange(len(output_sentence[1:])))
    ax.set_yticklabels(output_sentence[1:]) # ignore SOS Token

    plt.show()

In [19]:
print_validation(1)

**Eingabe**

2 EOS

**Ausgabe**

SOS EOS

AttributeError: 'NoneType' object has no attribute 'squeeze'