# GPU util
???/epoch

In [1]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from data_prepare import TextPairs
import numpy as np
import pickle as pkl
from torch.utils.data import DataLoader
from RNN import RNN_encoder, RNN_decoder
from Transformer import Transformer_encoder, Transformer_decoder
from datetime import datetime
from math import sin, cos
from nltk.translate.bleu_score import corpus_bleu

%load_ext autoreload
%autoreload 2

NUM_EPOCHS = 5
BATCH_SIZE = 400
VOCA_SIZE = 15000 # smaller than len(text_pairs.voca['en']), len(text_pairs.voca['de']) => 4000 / 15000 for toy / real
NUM_LAYERS = 2
HIDDEN_DIM = 256
EMBEDDING_DIM = 200

# toy: 10,000 sentences / real: 176,692 sentences
# 4473 ,6706 / 15151, 32829
start = datetime.now()
train_pairs = TextPairs(VOCA_SIZE, train=True, toy=False)
val_pairs = TextPairs(VOCA_SIZE, train=False, toy=False)

print( len(train_pairs.voca['en']) )
print( len(train_pairs.voca['de']) )

trainLoader = DataLoader(train_pairs, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
valLoader = DataLoader(val_pairs, num_workers=4)

MAX_LEN = train_pairs.max_len
SAMPLE = [15, 5015, 10015]
print(f'\nElapsed time: {datetime.now() - start}')
print(f'\nData_length: {len(train_pairs)}')
print(f'VOCA_SIZE: {VOCA_SIZE}')

15000
15000

Elapsed time: 0:02:20.049706

Data_length: 159022
VOCA_SIZE: 15000


In [2]:
with open('Data/Glove/glove.6B.200d.pkl', 'rb') as f:
    glove = pkl.load(f)

embedding_matrix = torch.zeros(VOCA_SIZE, EMBEDDING_DIM)

for w in train_pairs.voca['en']:
    if glove.get(w) is None:
        embedding_matrix[ train_pairs.word2id['en'][w] ] = torch.zeros(EMBEDDING_DIM)
    else:
        embedding_matrix[ train_pairs.word2id['en'][w] ] = torch.from_numpy(glove.get(w))

In [3]:
class RNN_MODEL(nn.Module):
    def __init__(self, NUM_LAYERS, HIDDEN_DIM, VOCA_SIZE, EMBEDDING_DIM, embedding_matrix, MAX_LEN):
        super(RNN_MODEL, self).__init__()
        self.encoder = RNN_encoder(NUM_LAYERS, HIDDEN_DIM, VOCA_SIZE, EMBEDDING_DIM, embedding_matrix)
        self.decoder = RNN_decoder(NUM_LAYERS, HIDDEN_DIM, VOCA_SIZE, EMBEDDING_DIM, MAX_LEN)
        
    def forward(self, encoder_inputs, decoder_inputs, train):
        context = self.encoder(encoder_inputs)
        preds = self.decoder(decoder_inputs, context, train) # BATCH_SIZE, MAX_LEN, hidden_dim
        return preds


model = RNN_MODEL(NUM_LAYERS, HIDDEN_DIM, VOCA_SIZE, EMBEDDING_DIM, embedding_matrix, MAX_LEN)
gpu = torch.device('cuda:2')
cpu = torch.device('cpu')
model = model.to(gpu)

num_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {num_params}')

Total number of parameters: 12881889


In [4]:
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


print(datetime.now())

train_losses = []
val_losses = []
BLEUS = []
for epoch in range(NUM_EPOCHS):
    train_loss = 0
    print("\n\n")
    print(f'Epoch: {epoch+1}\t', datetime.now())
    
    for i, data in enumerate(trainLoader):
        en_text, de_text = data['en'], data['de']

        encoder_inputs, decoder_inputs, targets = en_text, de_text[:,:-1], de_text[:,1:]
        encoder_inputs = encoder_inputs.to(gpu)
        decoder_inputs = decoder_inputs.to(gpu)
        targets = targets.to(gpu)
        
        preds = model(encoder_inputs, decoder_inputs, train=True)
        loss = criterion( preds.view(-1, VOCA_SIZE), targets.contiguous().view(-1))

        optimizer.zero_grad()

        loss.backward()
        train_loss += float(loss)/150

        optimizer.step()
        
        if (i+1) % 150==0: # len(trainLoader) = 398
            train_losses.append(train_loss)
            references, hypotheses = [], []
            val_loss = 0
            model = model.to(cpu)
            
            with torch.no_grad():
                for j, data in enumerate(valLoader):
                    en_text, de_text = data['en'], data['de']

                    sos = torch.tensor([[2]])
                    preds = model( en_text, sos, train=False) # BATCH_SIZE, MAX_LEN, hidden_dim

                    preds_loss = preds.new_zeros(MAX_LEN, VOCA_SIZE)
                    preds_loss[:len(preds[0])] = preds[0]
                    targets = de_text[:,1:]
                    loss = criterion( preds_loss, targets.contiguous().view(-1))
                    val_loss += float(loss)/len(valLoader)
                    

                    tokens = torch.argmax(preds[0], dim=-1)
                    text = [ val_pairs.voca['de'][t] for t in tokens if t not in [0,2,3]]

                    reference = val_pairs.hyp2ref[ ' '.join([val_pairs.voca['en'][t] for t in en_text[0] if t not in [0,2,3]]) ]
                    hypothesis = text

                    references.append(reference)
                    hypotheses.append(hypothesis)

                    if j in SAMPLE:
                        print('Pred:\t', hypothesis)
                        print('Target:\t', reference[0])

            val_losses.append(val_loss)
            BLEUS.append(corpus_bleu(references, hypotheses))
            print(f'Train loss:\t{train_losses[-1]:.3f}')
            print(f'Val loss:\t{val_losses[-1]:.3f}')
            print('BLEU score:\t', BLEUS[-1])
            train_loss=0
            model = model.to(gpu)


2019-04-12 09:50:37.630591



Epoch: 1	 2019-04-12 09:50:37.631212
Pred:	 ['ich', 'ist', ',', 'ich', 'ist', ',', 'ich', 'ist', ',', 'dass', 'ich', 'ist', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'tom']
Target:	 ['ich', 'bin', 'am', 'verhungern', 'los', ',', 'gib', 'mir', 'schnell', 'etwas', 'zu', 'essen']
Pred:	 ['ich', 'ist', ',', 'ich', 'ist', ',', 'ich', 'ist', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'tom']
Target:	 ['eine', 'person', 'kann', 'eine', 'andere', 'person', 'nicht', 'vollkommen', 'verstehen']
Pred:	 ['ich', 'ist', ',', 'ich', 'ist', ',', 'ich', 'ist', ',', 'ich', 'ist', ',', 'dass', 'ich', 'ist', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'ich', ',', 'dass', 'tom']
Target:	 ['tom', 'und', 'maria', 'hatten', 'beide', 'keine', 'ahnung', ',', 'wovon', 'johannes', 'redete']
Train loss:	8.943
Val loss:	8.045
BLEU score:	 0.0023199470306732033