In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from masked_cross_entropy import *


In [2]:
def random_sequences(length_from, length_to,
                     vocab_lower, vocab_upper,
                     batch_size):
    """ Generates batches of random integer sequences,
        sequence length in [length_from, length_to],
        vocabulary in [vocab_lower, vocab_upper]
    """
    if length_from > length_to:
            raise ValueError('length_from > length_to')

    def random_length():
        if length_from == length_to:
            return length_from
        return np.random.randint(length_from, length_to + 1)
    
    while True:
 
        
        padded = np.zeros([batch_size,length_to])
        seq_lengths = np.zeros([batch_size])
        for i in range(batch_size):
            rand = np.random.randint(low=vocab_lower,
                              high=vocab_upper,
                              size=random_length()).tolist()
            seq_lengths[i] = len(rand)
            padded[i,0:len(rand)] = rand
            
       
        concat = np.zeros([batch_size,padded.shape[1]+1])
        concat[:,0] = seq_lengths
        concat[:,1:concat.shape[1]] = padded
        
        yield concat[:,1:],concat[:,0].astype(int)



In [3]:
device = torch.device('cpu')
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.dropout = dropout
        
        self.gru = nn.GRU(input_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
        self.hidden = None
    def forward(self, input_seqs, input_lengths):
        # Note: we run this all at once (over multiple batches of multiple sequences)
        #embedded = self.embedding(input_seqs)
        input_seqs = input_seqs.unsqueeze(-1).type(torch.FloatTensor)
        packed = torch.nn.utils.rnn.pack_padded_sequence(input_seqs, input_lengths)
        outputs, self.hidden = self.gru(packed, self.hidden)
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # Sum bidirectional outputs
        
        return outputs
    
    
class DecoderRNN(nn.Module):
    def __init__(self,output_size,hidden_size,max_seq_length,enc_hidden_size,n_layers=1):
        super(DecoderRNN, self).__init__()
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.max_seq_length = max_seq_length
        self.enc_hidden_size = enc_hidden_size

        self.gru = nn.GRU(enc_hidden_size, hidden_size, n_layers, bidirectional=True)
        self.hidden = None
       
        self.concat = nn.Linear(hidden_size*2,hidden_size)
        self.out = nn.Linear(hidden_size,output_size)
        
        
    def forward(self, encoder_outputs):
        rnn_in = encoder_outputs[-1].unsqueeze(0)
           
        outputs, self.hidden = self.gru(rnn_in, self.hidden)
        outputs = self.concat(outputs)
        out = self.out(outputs)

        return out
    


In [4]:
max_seq_length = 3
batch_size = 25    
input_size =1

hidden_size = 10
enc_model = EncoderRNN(input_size,hidden_size)
dec_model = DecoderRNN(input_size,hidden_size,max_seq_length,hidden_size)

enc_optimizer = torch.optim.Adam(enc_model.parameters(), lr=0.001)
dec_optimizer = torch.optim.Adam(dec_model.parameters(), lr=0.001)

for i in range(10000):
    enc_optimizer.zero_grad()
    dec_optimizer.zero_grad()
    d, s = next(random_sequences(max_seq_length, max_seq_length,
                     1, 9, 
                     batch_size))

    enc_out = enc_model(torch.from_numpy(d.T),torch.from_numpy(s))
    
    all_out = torch.from_numpy(np.zeros([max_seq_length,batch_size,input_size]))
    #print(all_out.shape)
    for j in range(max_seq_length):

        dec_out = dec_model(enc_out)
        all_out[j] = dec_out
    
    d = torch.from_numpy(d).unsqueeze(-1).transpose(0,1)
    
    loss = torch.nn.L1Loss()(all_out,d)
    
    
    loss.backward()
    enc_optimizer.step()
    dec_optimizer.step()
    enc_model.hidden = None
    dec_model.hidden = None
    #d = d.squeeze(-1).detach().numpy().T[0]
    #all_out = all_out.squeeze(-1).detach().numpy().T[0]
    
    d = d[:,0].squeeze(-1).detach().numpy()
    
    all_out = all_out[:,0].squeeze(-1).detach().numpy()
    if i % 100 == 0:
        print("Loss : {}  Real : {}  Pred : {}".format(loss.detach().numpy(),d,all_out))

Loss : 4.190133734941482  Real : [1. 1. 5.]  Pred : [0.16370735 0.15192667 0.15083903]
Loss : 1.9762819989522298  Real : [1. 8. 5.]  Pred : [3.77183771 4.92165184 5.20417118]
Loss : 1.8544980382919312  Real : [7. 5. 1.]  Pred : [2.46427679 2.66099334 2.5252831 ]
Loss : 1.1701394279797872  Real : [8. 7. 8.]  Pred : [6.17251587 7.22404528 7.39565897]
Loss : 1.577463877995809  Real : [8. 5. 8.]  Pred : [5.82043695 7.03967285 7.2842598 ]
Loss : 0.9567384751637776  Real : [4. 1. 2.]  Pred : [2.88248682 2.86798477 1.93761647]
Loss : 0.994472173055013  Real : [8. 3. 7.]  Pred : [7.14508724 7.1848278  6.50663805]
Loss : 0.8063480011622111  Real : [1. 6. 8.]  Pred : [2.16676426 4.9029665  7.44788265]
Loss : 0.7817811330159505  Real : [4. 2. 3.]  Pred : [3.70629096 3.52619076 2.85771084]
Loss : 0.8273346964518229  Real : [8. 2. 2.]  Pred : [7.67947817 4.16674995 1.7464664 ]
Loss : 0.8382234287261963  Real : [3. 2. 8.]  Pred : [2.95862103 5.84107733 7.94989634]
Loss : 0.673198504447937  Real : [4

KeyboardInterrupt: 