In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import numpy as np
from batchify import Corpus
import random
import torch.optim as optim
from torch.autograd import Variable
from model import Encoder, Decoder

In [2]:
corpus=Corpus()

In [3]:
device=torch.device('cuda')
print (device)

cuda


In [4]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion,corpus):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    encoded, hidden=encoder(input_tensor)
    batch_size=target_tensor.size(0)
    decoder_input = np.zeros((batch_size,1))
    decoder_input[:]=corpus.w2i['<sos>']
    decoder_input=torch.from_numpy(decoder_input).cuda().detach()
    dec_len=target_tensor.size(1)
    decoded, hidden, outputs=decoder(encoded,hidden,decoder_input.long(),dec_len,target_tensor)
    s=outputs.size(1)
    loss=0
    for i in range(s):
        loss+=criterion(outputs[:,i,:],target_tensor[:,i])
    loss.backward()
    torch.nn.utils.clip_grad_norm_(encoder.parameters(), 2)
    torch.nn.utils.clip_grad_norm_(decoder.parameters(), 2)

    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return loss/target_tensor.size(1)

In [5]:
def validation(input_tensor, target_tensor, encoder, decoder, criterion,corpus):
    with torch.no_grad():
        encoded, hidden=encoder(input_tensor.long())
        batch_size=target_tensor.size(0)
        decoder_input = np.zeros((batch_size,1))
        decoder_input[:]=corpus.w2i['<sos>']
        decoder_input=torch.from_numpy(decoder_input).cuda().detach()
        dec_len=target_tensor.size(1)
        decoded, hidden,outputs=decoder(encoded,hidden,decoder_input.long(),dec_len,target_tensor,val=True)
        s=outputs.size(1)
        loss=0
        for i in range(s):
            loss+=criterion(outputs[:,i,:],target_tensor[:,i].long())
        
    return loss/target_tensor.size(1)

In [6]:
encoder=Encoder(vocab_size=50000, embedding_size=128, hidden_size=256)
decoder=Decoder(vocab_size=50000, embedding_dim=128, hidden_dim=256)
encoder=encoder.to(device)
decoder=decoder.to(device)

In [7]:
#encoder_optimizer = optim.Adagrad(encoder.parameters(), lr=0.15, initial_accumulator_value=0.1)
#decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=0.15, initial_accumulator_value=0.1)
encoder_optimizer = optim.Adam(encoder.parameters())
decoder_optimizer = optim.Adam(decoder.parameters())
criterion=nn.NLLLoss()

In [8]:
#checkpoint = torch.load('model.pth')
#encoder.load_state_dict(checkpoint['encoder_state_dict'])
#decoder.load_state_dict(checkpoint['decoder_state_dict'])
#encoder_optimizer.load_state_dict(checkpoint['encoder_optimizer_state_dict'])
#decoder_optimizer.load_state_dict(checkpoint['decoder_optimizer_state_dict'])
#step = checkpoint['step']
#tl = checkpoint['training_loss']
#vl = checkpoint['validation_loss']

#encoder.train()
#decoder.train()

Decoder(
  (attn): Linear(in_features=500, out_features=400, bias=True)
  (attn_combine): Linear(in_features=500, out_features=250, bias=True)
  (linear1): Linear(in_features=500, out_features=250, bias=True)
  (linear2): Linear(in_features=500, out_features=250, bias=True)
  (embedding): Embedding(50000, 250)
  (gru): GRU(250, 250, batch_first=True)
  (linear): Linear(in_features=250, out_features=50000, bias=True)
)

In [8]:
corpus.counter=47820
#print(corpus.counter,step)

In [None]:
tl=[]
vl=[]
num_steps=10000
val_loss_benchmark=10
for i in range(0,num_steps):
    input_tensor_train,target_tensor_train=corpus.get_train_minibatch()
    input_tensor_val,target_tensor_val=corpus.get_validation_batch()
    input_tensor_train=Variable(input_tensor_train.cuda())
    target_tensor_train=Variable(target_tensor_train.cuda())
    input_tensor_val=Variable(input_tensor_val.cuda())
    target_tensor_val=Variable(target_tensor_val.cuda())
    train_loss=train(input_tensor_train,target_tensor_train , encoder, decoder, encoder_optimizer, decoder_optimizer, criterion,corpus)
    val_loss=validation(input_tensor_val,target_tensor_val, encoder, decoder, criterion,corpus)
    tl.append(train_loss)
    vl.append(val_loss)
    print ('Step: {}/{} | Training Loss: {} | Validation Loss: {}'.format(i+1,num_steps,train_loss,val_loss))
    
    if (i>10 and val_loss<=val_loss_benchmark):
            print ('%---Saving the model---%')
            torch.save({
                'step':i+1,
                'encoder_state_dict': encoder.state_dict(),
                'decoder_state_dict': decoder.state_dict(),
                'encoder_optimizer_state_dict': encoder_optimizer.state_dict(),
                'decoder_optimizer_state_dict': decoder_optimizer.state_dict(),
                'training_loss':tl,
                'validation_loss':vl,
                'counter':corpus.counter
                },'model.pth')
            val_loss_benchmark=val_loss

Step: 1/10000 | Training Loss: 10.82170581817627 | Validation Loss: 10.730833053588867
Step: 2/10000 | Training Loss: 10.699522972106934 | Validation Loss: 10.529882431030273
Step: 3/10000 | Training Loss: 10.493638038635254 | Validation Loss: 10.283129692077637
Step: 4/10000 | Training Loss: 10.087101936340332 | Validation Loss: 9.895716667175293
Step: 5/10000 | Training Loss: 9.86425495147705 | Validation Loss: 9.364771842956543
Step: 6/10000 | Training Loss: 9.708233833312988 | Validation Loss: 8.783259391784668
Step: 7/10000 | Training Loss: 9.028059005737305 | Validation Loss: 8.28476333618164
Step: 8/10000 | Training Loss: 8.010141372680664 | Validation Loss: 7.925635814666748
Step: 9/10000 | Training Loss: 7.960357666015625 | Validation Loss: 7.657452583312988
Step: 10/10000 | Training Loss: 7.234936237335205 | Validation Loss: 7.4276323318481445
Step: 11/10000 | Training Loss: 7.411562442779541 | Validation Loss: 7.225571155548096
Step: 12/10000 | Training Loss: 6.0483646392822

In [17]:
corpus.counter

15160