In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable
from torch.nn.parameter import Parameter

import numpy as np

In [31]:
def LSTMCell(input, hidden, weight_input, weight_hidden, bias_input=None, bias_hidden=None):
    gates = F.linear(input, weight_input, bias_input) + \
        F.linear(hidden, weight_hidden, bias_hidden)
    
    forget_gate, input_gate, cell_gate, output_gate = gates.chunk(4,1)
    
    forget_gate = F.sigmoid(forget_gate)
    input_gate = F.sigmoid(input_gate)
    cell_gate = F.tanh(cell_gate)
    output_gate = F.sigmoid(output_gate)
    
    cell_state = hidden * forget_gate + (input_gate * cell_gate)
    
    hidden_state = output_gate * F.tanh(cell_state)
    
    return cell_state, hidden_state
    

In [33]:
class GRUCell(nn.Module):
    def __init__(self, input_size, hidden_size, bias=True):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.W = Parameter(torch.Tensor(3 * hidden_size, input_size))
        self.U = Parameter(torch.Tensor(3 * hidden_size, hidden_size))
        
        if bias:
            self.bias_input = Parameter(torch.Tensor(3 * hidden_size, hidden_size))
            self.bias_hidden = Parameter(torch.Tensor(3 * hidden_size, hidden_size))
        else:
            self.bias_input = None
            self.bias_hidden = None
        
    def forward(self, input, hidden):
        tmp_input = F.linear(input, self.W, self.bias_input)
        tmp_hidden = F.linear(hidden, self.U, self.bias_hidden)

        reset_input, update_input, output_input = tmp_input.chunk(3,1)
        reset_hidden, update_hidden, output_hidden = tmp_hidden.chunk(3,1)

        reset_gate = F.sigmoid(reset_input + reset_hidden)
        update_gate = F.sigmoid(update_input + update_hidden)
        output_gate = F.tanh(reset_gate + output_input + output_hidden)

        new_hidden = (1 - update_gate) * hidden + update_gate * output_gate
        return new_hidden
        

In [40]:
class LSTM(nn.Module):
    '''
    LSTM with a GRU cell
    '''
    def __init__(self, input_size, hidden_size, bias=True,
                num_layers=1, batch_first=False):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        if bias:
            self.bias_input = Parameter(torch.Tensor(3 * hidden_size, hidden_size))
            self.bias_hidden = Parameter(torch.Tensor(3 * hidden_size, hidden_size))
        else:
            self.bias_input = None
            self.bias_hidden = None
                        
        self.gru = GRUCell(self.input_size, self.hidden_size)
            
    def forward(self, input, prev_state):
        return self.gru.forward(input, prev_state)
        

Weight Input includes weights for all gates so its (input_size * 3)

is f.linear + f.linear == f.linear(torch.concat)

In [29]:
input = torch.ones(5,2)
hidden = torch.ones(5,2)

w_ih = torch.ones(5,2)
w_hh = torch.ones(5,2)

lin1 = F.linear(input, w_ih, None)
lin2 = F.linear(hidden, w_hh, None)

concat_input = torch.cat([input, hidden])
concat_weight = torch.cat([w_ih, w_hh])

res2 = F.linear(concat_input, concat_weight)
res1 = lin1 + lin2

#res1 == res2
input.transpose(0, 1)


 1  1  1  1  1
 1  1  1  1  1
[torch.FloatTensor of size 2x5]

In [50]:
output = [input, hidden]

# 0 is column wise
output = torch.cat(output, 0).view(input.size(1), *output[0].size())
output


(0 ,.,.) = 
  1  1
  1  1
  1  1
  1  1
  1  1

(1 ,.,.) = 
  1  1
  1  1
  1  1
  1  1
  1  1
[torch.FloatTensor of size 2x5x2]

In [82]:
import rnn
import rnn_cells

In [83]:
input_size = 2
hidden_size = 2
gru_cell = rnn_cells.GRUCell(input_size, hidden_size)
gru = rnn.RNN(input_size, hidden_size, gru_cell)

AttributeError: 'torch.FloatTensor' object has no attribute 'uniform'

In [70]:
Parameter(torch.Tensor(5)).data.uniform_(-1, 1)


-0.9608
 0.6994
-0.4091
-0.9141
-0.5123
[torch.FloatTensor of size 5]