In [7]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable

token = {
    '\0' : 0,
    '.' : 1, 
    ',' : 2, 
    '[' : 3, 
    ']' : 4, 
    '<' : 5, 
    '>' : 6, 
    '+' : 7, 
    '-' : 8,
    "START" : 9
    }

char = {
    0 : '\0',
    1 : '.',
    2 : ',', 
    3 : '[', 
    4 : ']',  
    5 : '<',  
    6 : '>', 
    7 : '+',  
    8 : '-'
    # no START on purpose
    }

class BFgen(nn.Module):
    def __init__(self, input_size, embedding_dim, hidden_size, output_size, n_layers=2, batch_size=1):
        super(BFgen, self).__init__()
        self.input_size = input_size
        self.embedding_dim = embedding_dim
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.batch_size = batch_size
        
        self.encoder = nn.Embedding(input_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        self.softmax = nn.functional.log_softmax
        

    """
    forward
    Takes input_token and hidden memory state <- input to recursive layer
    returns output token and changed hidden memory state.
    """
    def forward(self, input_token, hidden):
        embeds = self.encoder(input_token)
        output, hidden = self.lstm(
            embeds.view(len(input_token), self.batch_size, -1), hidden)
        output = self.decoder(output.view(self.batch_size, -1))
        output = self.softmax(output) # in paper its multinomial distribution
        return output, hidden
    
    def init_hidden_zero(self):
        self.hidden = (Variable(torch.zeros(self.n_layers, self.batch_size, self.hidden_size)),
                      Variable(torch.zeros(self.n_layers, self.batch_size, self.hidden_size)),)
    
    def init_hidden_normal(self):
        means = torch.zeros(self.n_layers, self.batch_size, self.hidden_size)
        std = torch.Tensor([0.001]*self.hidden_size*self.n_layers*self.batch_size).unsqueeze(0)
        self.hidden = (Variable(torch.normal(means, std)), Variable(torch.normal(means, std)))

    def evaluate(self, predict_len=100):
        input_token = token["START"]
        hidden = self.init_hidden_zero
        prediction = ""

        for i in range(predict_len):
            output_token, hidden = self.forward(input_token, hidden)
            input_token = output_token

            prediction += char[output_token]
            if output_token == '\0':
                break

        return prediction



In [18]:
embedding_size = 10
hidden_size = 35
output_size = 9
n_layers = 2

token_num = len(token.keys())

In [19]:
model = BFgen(10, embedding_size, hidden_size, output_size, n_layers, 64)

In [20]:
model.init_hidden_normal()

In [17]:
print(model.hidden)

(Variable containing:
(0 ,.,.) = 
1.00000e-03 *
 -1.5843  0.1523  0.7197  ...   1.1720 -1.2670 -0.7931
 -0.7721 -0.0372  0.7097  ...   0.8015  0.6435 -0.1788
  0.9605  0.5317  1.0641  ...   0.7549  0.5456  0.4341
           ...             ⋱             ...          
  0.2711  0.2142 -0.5712  ...   0.3432  0.6767 -0.3111
  0.1518 -0.3111 -1.1244  ...   0.9975  1.4528  0.3571
 -0.4659  1.0484  0.4992  ...  -1.2222 -0.3975 -0.7004

(1 ,.,.) = 
1.00000e-03 *
  0.0843 -0.6299 -0.3144  ...  -0.1246  1.3298  0.6861
  0.0007 -1.5905 -0.3623  ...   0.2020 -2.0490 -1.2851
 -0.2309  0.7680 -0.1753  ...  -1.6299  0.8580 -2.0703
           ...             ⋱             ...          
  2.3861  0.3732 -0.0975  ...   1.1056 -0.6599 -0.2478
  1.0782  0.1525 -0.6702  ...   0.6983  0.2251 -1.2922
  0.4027 -1.1268 -2.6957  ...   0.7005  1.3965  0.4474
[torch.FloatTensor of size 2x64x35]
, Variable containing:
(0 ,.,.) = 
1.00000e-03 *
  1.9834 -0.8784 -1.4083  ...  -1.7819 -0.9550 -1.4679
 -0.4587 -0.309

In [26]:
def token_to_tensor(input_token):
    tensor = torch.zeros(1, token_num).long()
    tensor[0][token[input_token]] = 1
    return tensor

In [27]:
input_sample = token_to_tensor(">")
print input_sample


    0     0     0     0     0     0     1     0     0     0
[torch.LongTensor of size 1x10]



In [28]:
model.forward(Variable(input_sample), model.hidden)

RuntimeError: invalid argument 2: size '[1 x 64 x -1]' is invalid for input with 100 elements at /Users/soumith/code/builder/wheel/pytorch-src/torch/lib/TH/THStorage.c:37