In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from masked_cross_entropy import *


In [2]:
def random_sequences(length_from, length_to,
                     vocab_lower, vocab_upper,
                     batch_size):
    """ Generates batches of random integer sequences,
        sequence length in [length_from, length_to],
        vocabulary in [vocab_lower, vocab_upper]
    """
    if length_from > length_to:
            raise ValueError('length_from > length_to')

    def random_length():
        if length_from == length_to:
            return length_from
        return np.random.randint(length_from, length_to + 1)
    
    while True:
 
        
        padded = np.zeros([batch_size,length_to])
        seq_lengths = np.zeros([batch_size])
        for i in range(batch_size):
            rand = np.random.randint(low=vocab_lower,
                              high=vocab_upper,
                              size=random_length()).tolist()
            seq_lengths[i] = len(rand)
            padded[i,0:len(rand)] = rand
            
       
        concat = np.zeros([batch_size,padded.shape[1]+1])
        concat[:,0] = seq_lengths
        concat[:,1:concat.shape[1]] = padded
        
        yield concat[:,1:],concat[:,0].astype(int)



In [3]:
device = torch.device('cpu')
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=3, dropout=0):
        super(EncoderRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.dropout = dropout
        
        self.gru = nn.GRU(input_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
        self.hidden = None
    def forward(self, input_seqs, input_lengths):
        # Note: we run this all at once (over multiple batches of multiple sequences)
        #embedded = self.embedding(input_seqs)
        input_seqs = input_seqs.unsqueeze(-1).type(torch.FloatTensor)
        packed = torch.nn.utils.rnn.pack_padded_sequence(input_seqs, input_lengths)
        outputs, self.hidden = self.gru(packed, self.hidden)
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # Sum bidirectional outputs
        
        return outputs
    
    
class DecoderRNN(nn.Module):
    def __init__(self,output_size,hidden_size,enc_hidden_size,n_layers=3):
        super(DecoderRNN, self).__init__()
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.max_seq_length = max_seq_length
        self.enc_hidden_size = enc_hidden_size

        self.gru = nn.GRU(enc_hidden_size, hidden_size, n_layers, bidirectional=True)
        self.hidden = None
       
        self.concat = nn.Linear(hidden_size*2,hidden_size)
        self.out = nn.Linear(hidden_size,output_size)
        
        
    def forward(self, encoder_outputs):
        rnn_in = encoder_outputs[-1].unsqueeze(0)
           
        outputs, self.hidden = self.gru(rnn_in, self.hidden)
        outputs = self.concat(outputs)
        out = self.out(outputs)

        return out
    


In [None]:
max_seq_length = 3
batch_size = 25    
input_size =1

hidden_size = 10
enc_model = EncoderRNN(input_size,hidden_size)
dec_model = DecoderRNN(input_size,hidden_size,hidden_size)

enc_optimizer = torch.optim.Adam(enc_model.parameters(), lr=0.001)
dec_optimizer = torch.optim.Adam(dec_model.parameters(), lr=0.001)

for i in range(10000):
    enc_optimizer.zero_grad()
    dec_optimizer.zero_grad()
    d, s = next(random_sequences(max_seq_length, max_seq_length,
                     1, 9, 
                     batch_size))

    enc_out = enc_model(torch.from_numpy(d.T),torch.from_numpy(s))
    
    all_out = torch.from_numpy(np.zeros([max_seq_length,batch_size,input_size]))
    #print(all_out.shape)
    for j in range(max_seq_length):

        dec_out = dec_model(enc_out)
        all_out[j] = dec_out
    
    d = torch.from_numpy(d).unsqueeze(-1).transpose(0,1)
    
    loss = torch.nn.L1Loss()(all_out,d)
    
    
    loss.backward()
    enc_optimizer.step()
    dec_optimizer.step()
    enc_model.hidden = None
    dec_model.hidden = None
    #d = d.squeeze(-1).detach().numpy().T[0]
    #all_out = all_out.squeeze(-1).detach().numpy().T[0]
    
    d = d[:,0].squeeze(-1).detach().numpy()
    
    all_out = all_out[:,0].squeeze(-1).detach().numpy()
    if i % 100 == 0:
        print("Loss : {}  Real : {}  Pred : {}".format(loss.detach().numpy(),d,all_out))

Loss : 4.6999797098835305  Real : [2. 3. 7.]  Pred : [0.00351813 0.02101257 0.03675535]
Loss : 2.173477840423584  Real : [1. 8. 2.]  Pred : [4.1021719  5.03765106 5.20327425]
Loss : 1.7978454717000325  Real : [1. 6. 2.]  Pred : [4.0892868  4.73458338 4.83386135]
Loss : 1.918986333211263  Real : [6. 2. 3.]  Pred : [3.99772978 4.51270819 4.5884552 ]
Loss : 1.4174486176172891  Real : [5. 3. 8.]  Pred : [4.95175219 5.45887089 5.27206278]
Loss : 0.9013707717259725  Real : [8. 2. 4.]  Pred : [7.41629553 5.73517466 3.810179  ]
Loss : 0.9992927519480387  Real : [4. 6. 2.]  Pred : [4.12141323 3.6221261  2.34722829]
Loss : 0.8726032161712647  Real : [5. 4. 7.]  Pred : [5.09262323 6.56126022 7.09937906]
Loss : 0.8284240237871806  Real : [1. 8. 8.]  Pred : [1.76905215 5.91340923 8.15221214]
Loss : 0.339986310005188  Real : [6. 6. 7.]  Pred : [5.68950462 5.82704687 6.72315502]
Loss : 0.1674202076594035  Real : [1. 7. 2.]  Pred : [1.15956974 6.4580965  2.22168398]
Loss : 0.13267957290013632  Real : 