In [1]:
import numpy as np

class Tensor (object):
    
    def __init__(self,data,
                 autograd=False,
                 creators=None,
                 creation_op=None,
                 id=None):
        
        self.data = np.array(data)
        self.autograd = autograd
        self.grad = None
        if(id is None):
            self.id = np.random.randint(0,100000)
        else:
            self.id = id
        
        self.creators = creators
        self.creation_op = creation_op
        self.children = {}
        
        if(creators is not None):
            for c in creators:
                if(self.id not in c.children):
                    c.children[self.id] = 1
                else:
                    c.children[self.id] += 1

    def all_children_grads_accounted_for(self):
        for id,cnt in self.children.items():
            if(cnt != 0):
                return False
        return True 
        
    def backward(self,grad=None, grad_origin=None):
        if(self.autograd):
 
            if(grad is None):
                grad = Tensor(np.ones_like(self.data))

            if(grad_origin is not None):
                if(self.children[grad_origin.id] == 0):
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1

            if(self.grad is None):
                self.grad = grad
            else:
                self.grad += grad
            
            assert grad.autograd == False
            
            if(self.creators is not None and 
               (self.all_children_grads_accounted_for() or 
                grad_origin is None)):

                if(self.creation_op == "add"):
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self)
                    
                if(self.creation_op == "sub"):
                    self.creators[0].backward(Tensor(self.grad.data), self)
                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)

                if(self.creation_op == "mul"):
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new , self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)                    
                    
                if(self.creation_op == "mm"):
                    c0 = self.creators[0]
                    c1 = self.creators[1]
                    new = self.grad.mm(c1.transpose())
                    c0.backward(new)
                    new = self.grad.transpose().mm(c0).transpose()
                    c1.backward(new)
                    
                if(self.creation_op == "transpose"):
                    self.creators[0].backward(self.grad.transpose())

                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.expand(dim,
                                                               self.creators[0].data.shape[dim]))

                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if(self.creation_op == "neg"):
                    self.creators[0].backward(self.grad.__neg__())
                    
                if(self.creation_op == "sigmoid"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                
                if(self.creation_op == "tanh"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (ones - (self * self)))
                
                if(self.creation_op == "index_select"):
                    new_grad = np.zeros_like(self.creators[0].data)
                    indices_ = self.index_select_indices.data.flatten()
                    grad_ = grad.data.reshape(len(indices_), -1)
                    for i in range(len(indices_)):
                        new_grad[indices_[i]] += grad_[i]
                    self.creators[0].backward(Tensor(new_grad))
                    
                if(self.creation_op == "cross_entropy"):
                    dx = self.softmax_output - self.target_dist
                    self.creators[0].backward(Tensor(dx))
                    
    def __add__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data + other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="add")
        return Tensor(self.data + other.data)

    def __neg__(self):
        if(self.autograd):
            return Tensor(self.data * -1,
                          autograd=True,
                          creators=[self],
                          creation_op="neg")
        return Tensor(self.data * -1)
    
    def __sub__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data - other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="sub")
        return Tensor(self.data - other.data)
    
    def __mul__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data * other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="mul")
        return Tensor(self.data * other.data)    

    def sum(self, dim):
        if(self.autograd):
            return Tensor(self.data.sum(dim),
                          autograd=True,
                          creators=[self],
                          creation_op="sum_"+str(dim))
        return Tensor(self.data.sum(dim))
    
    def expand(self, dim,copies):

        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(dim,len(self.data.shape))
        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
        
        if(self.autograd):
            return Tensor(new_data,
                          autograd=True,
                          creators=[self],
                          creation_op="expand_"+str(dim))
        return Tensor(new_data)
    
    def transpose(self):
        if(self.autograd):
            return Tensor(self.data.transpose(),
                          autograd=True,
                          creators=[self],
                          creation_op="transpose")
        
        return Tensor(self.data.transpose())
    
    def mm(self, x):
        if(self.autograd):
            return Tensor(self.data.dot(x.data),
                          autograd=True,
                          creators=[self,x],
                          creation_op="mm")
        return Tensor(self.data.dot(x.data))
    
    def sigmoid(self):
        if(self.autograd):
            return Tensor(1 / (1 + np.exp(-self.data)),
                          autograd=True,
                          creators=[self],
                          creation_op="sigmoid")
        return Tensor(1 / (1 + np.exp(-self.data)))

    def tanh(self):
        if(self.autograd):
            return Tensor(np.tanh(self.data),
                          autograd=True,
                          creators=[self],
                          creation_op="tanh")
        return Tensor(np.tanh(self.data))
    
    def index_select(self, indices):
        if(self.autograd):
            new = Tensor(self.data[indices.data], autograd = True,
                        creators = [self],
                        creation_op = "index_select")
            new.index_select_indices = indices
            return new
        return Tensor(self.data[indices.data])
    
    def cross_entropy(self, target_indices):

        temp = np.exp(self.data)
        softmax_output = temp / np.sum(temp,
                                       axis=len(self.data.shape)-1,
                                       keepdims=True)
        
        t = target_indices.data.flatten()
        p = softmax_output.reshape(len(t),-1)
        target_dist = np.eye(p.shape[1])[t]
        loss = -(np.log(p) * (target_dist)).sum(1).mean()
    
        if(self.autograd):
            out = Tensor(loss,
                         autograd=True,
                         creators=[self],
                         creation_op="cross_entropy")
            out.softmax_output = softmax_output
            out.target_dist = target_dist
            return out
        return Tensor(loss)
        
    
    def __repr__(self):
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())  
    
class SGD(object):
    def __init__(self, parameters, alpha = 0.1):
        self.parameters = parameters
        self.alpha = alpha
    
    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0
            
    def step(self, zero = True):
        for p in self.parameters:
            p.data -= p.grad.data * self.alpha
            if(zero):
                p.grad.data *= 0
                
class Layer(object):
    def __init__(self):
        self.parameters = list()
        
    def get_parameters(self):
        return self.parameters
    
class Linear(Layer):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        w = np.random.randn(n_inputs, n_outputs) * np.sqrt(2/(n_inputs))
        self.weight = Tensor(w, autograd = True)
        self.bias = Tensor(np.zeros(n_outputs), autograd = True)    
        self.parameters.append(self.weight)
        self.parameters.append(self.bias)
            
    def forward(self, input):
        return input.mm(self.weight) + self.bias.expand(0, len(input.data))
    
class Sequential(Layer):
    def __init__(self, layers = list()):
        super().__init__()
        self.layers = layers
            
    def add(self, layer):
        self.layers.append(layer)
            
    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input
        
    def get_parameters(self):
        params = list()
        for l in self.layers:
            params += l.get_parameters()
        return params
        
class MSELoss(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, pred, target):
        return ((pred - target)*(pred - target)).sum(0)
    
class Tanh(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.tanh()
    
class Sigmoid(Layer):
    def __init__(self):
        super().__init__()
        
    def forward(self, input):
        return input.sigmoid()
    
class Embedding(Layer):
    def __init__(self, vocab_size, dim):
        super().__init__()
        
        self.vocab_size = vocab_size
        self.dim = dim
        
        weight = (np.random.rand(vocab_size, dim) - 0.5) / dim
        self.weight = Tensor(weight, autograd = True)
        
        self.parameters.append(self.weight)
    
    def forward(self, input):
        return self.weight.index_select(input)
    
class CrossEntropyLoss(object):
    def __init__sef():
        super().__init__()
        
    def forward(self, input, target):
        return input.cross_entropy(target)
    
class RNNCell(Layer):
    def __init__(self, n_inputs, n_hidden, n_output, activation = "sigmoid"):
        super().__init__()
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_output = n_output
        
        if(activation == "sigmoid"):
            self.activation = Sigmoid()
        elif(activation == "tanh"):
            self.activation = Tanh()
        else:
            raise Exception("Non-linearity not found")
            
        self.w_ih = Linear(n_inputs, n_hidden)
        self.w_hh = Linear(n_hidden, n_hidden)
        self.w_ho = Linear(n_hidden, n_output)
        
        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()
        
    def forward(self, input, hidden):
        from_prev_hidden = self.w_hh.forward(hidden)
        combined = self.w_ih.forward(input) + from_prev_hidden
        new_hidden = self.activation.forward(combined)
        output = self.w_ho.forward(new_hidden)
        return output, new_hidden
    
    def init_hidden(self, batch_size = 1):
        return Tensor(np.zeros((batch_size, self.n_hidden)), autograd =True)

In [2]:
a = Tensor([1,2,3,4,5], autograd = True)
b = Tensor([2,2,2,2,2], autograd = True)
c = Tensor([5,4,3,2,1], autograd = True)

d = a + (-b)
e = (-b) + c
f = d + e

f.backward(Tensor(np.array([1,1,1,1,1])))

print(a.grad.data)
print(b.grad.data)
print(c.grad.data)
print(d.grad.data)
print(e.grad.data)
print(f.grad.data)

[1 1 1 1 1]
[-2 -2 -2 -2 -2]
[1 1 1 1 1]
[1 1 1 1 1]
[1 1 1 1 1]
[1 1 1 1 1]


In [3]:
np.random.seed(1)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

weights = list()
weights.append(Tensor(np.random.rand(2,3), autograd=True))
weights.append(Tensor(np.random.rand(3,1), autograd=True))

for i in range(10):
    pred = data.mm(weights[0]).mm(weights[1])
    loss = ((pred - target)*(pred - target)).sum(0)
    loss.backward(Tensor(np.ones_like(loss.data)))
    
    for w in weights:
        w.data -= w.grad.data * 0.1
        w.grad.data *= 0

    print(loss)

[1.12427324]
[0.64112616]
[0.44318917]
[0.31387083]
[0.20897697]
[0.12969368]
[0.07493533]
[0.04045744]
[0.02057729]
[0.00996295]


In [4]:
np.random.seed(1)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

weights = list()
weights.append(Tensor(np.random.rand(2,3), autograd=True))
weights.append(Tensor(np.random.rand(3,1), autograd=True))

optim = SGD(parameters=weights, alpha=0.1)

for i in range(10):
    pred = data.mm(weights[0]).mm(weights[1])
    loss = ((pred - target)*(pred - target)).sum(0)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()

    print(loss)

[1.12427324]
[0.64112616]
[0.44318917]
[0.31387083]
[0.20897697]
[0.12969368]
[0.07493533]
[0.04045744]
[0.02057729]
[0.00996295]


In [5]:
data = Tensor(np.array([[0,0], [0,1], [1,0], [1,1]]), autograd = True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd = True)

model = Sequential([Linear(2,3), Linear(3,1)])
optim = SGD(parameters = model.get_parameters(), alpha = 0.05)

for i in range(10):
    pred = model.forward(data)
    loss = ((pred - target) * (pred - target)).sum(0)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[0.58851814]
[0.53258841]
[4.65643674]
[35.63491234]
[36.18671332]
[99.85775706]
[39.46443576]
[10060.88863906]
[2.96794571e+09]
[1.06242054e+26]


In [6]:
data = Tensor(np.array([[0,0], [0,1], [1,0], [1,1]]), autograd = True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd = True)

model = Sequential([Linear(2,3), Linear(3,1)])
optim = SGD(parameters = model.get_parameters(), alpha = 0.05)
criterion = MSELoss()

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[2.32867133]
[0.38326249]
[0.17697789]
[0.12255879]
[0.09078094]
[0.06911117]
[0.05373838]
[0.04245233]
[0.03390907]
[0.02728052]


In [7]:
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([Linear(2,3), Tanh(), Linear(3,1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[1.18121562]
[0.77282441]
[0.54610058]
[0.35495787]
[0.23000602]
[0.15698963]
[0.11390411]
[0.08712864]
[0.06943514]
[0.0570973]


In [8]:
data = Tensor(np.array([1, 2, 1, 2]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([Embedding(5,3), Tanh(), Linear(3,1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[1.03597284]
[0.6053573]
[0.34029019]
[0.18851853]
[0.11855053]
[0.08308112]
[0.06262093]
[0.04960446]
[0.0407139]
[0.03431255]


In [9]:
data = Tensor(np.array([1,2,1,2]), autograd=True)
target = Tensor(np.array([0,1,0,1]), autograd=True)

model = Sequential([Embedding(3,3), Tanh(), Linear(3,4)])
criterion = CrossEntropyLoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

1.2772799770415606
0.9316691523502416
0.6991689385224904
0.5393340453712206
0.42686450937652054
0.34601834775353
0.28665853773543665
0.24212316273825107
0.2079896503488622
0.18129261801406055


In [10]:
import numpy as np

f = open('qa1_single-supporting-fact_train.txt', 'r')
raw = f.readlines()
f.close()

tokens = list()
for line in raw[0:1000]:
    tokens.append(line.lower().replace("\n", "").split(" ")[1:])
    
new_tokens = list()
for line in tokens:
    new_tokens.append(['-'] * (6 - len(line)) + line)
tokens = new_tokens

vocab = set()
for sent in tokens:
    for word in sent:
        vocab.add(word)
vocab = list(vocab)

word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
    
def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx

indices = list()
for line in tokens:
    idx = list()
    for w in line:
        idx.append(word2index[w])
    indices.append(idx)
    
data = np.array(indices)

In [11]:
embed = Embedding(vocab_size = len(vocab), dim = 16)
model = RNNCell(n_inputs = 16, n_hidden = 16, n_output = len(vocab))

criterion = CrossEntropyLoss()
params = model.get_parameters() + embed.get_parameters()

optim = SGD(parameters = params, alpha = 0.05)

for j in range(1000):
    batch_size = 100
    total_loss = 0
    
    hidden = model.init_hidden(batch_size = batch_size)
    
    for t in range(5):
        input = Tensor(data[0:batch_size, t], autograd = True)
        rnn_input = embed.forward(input = input)
        output, hidden = model.forward(input = rnn_input, hidden = hidden)
        
    target = Tensor(data[0:batch_size, t+1], autograd = True)
    loss = criterion.forward(output, target)
    loss.backward()
    optim.step()
    total_loss += loss.data
    
    p_correct = (target.data == np.argmax(output.data,axis=1)).mean()        
    print_loss = total_loss / (len(data)/batch_size)        
    print("Loss:",print_loss,"% Correct:",p_correct)

Loss: 0.49188299424959636 % Correct: 0.0
Loss: 0.33242056426645106 % Correct: 0.14
Loss: 0.3168957052040789 % Correct: 0.13
Loss: 0.2987106296326819 % Correct: 0.14
Loss: 0.3124638876950327 % Correct: 0.13
Loss: 0.30309274689537696 % Correct: 0.14
Loss: 0.29548654097474825 % Correct: 0.13
Loss: 0.2700991418503088 % Correct: 0.14
Loss: 0.28209515745283287 % Correct: 0.13
Loss: 0.28740359686218253 % Correct: 0.18
Loss: 0.28193003962166846 % Correct: 0.18
Loss: 0.24595148124373106 % Correct: 0.2
Loss: 0.24739037248928444 % Correct: 0.18
Loss: 0.281169684691183 % Correct: 0.2
Loss: 0.2754616957672565 % Correct: 0.19
Loss: 0.26780440781830855 % Correct: 0.18
Loss: 0.2421571388141658 % Correct: 0.2
Loss: 0.2493762035107659 % Correct: 0.19
Loss: 0.25108643819860643 % Correct: 0.2
Loss: 0.2425524553278145 % Correct: 0.18
Loss: 0.22606436902978352 % Correct: 0.2
Loss: 0.22700453659789085 % Correct: 0.19
Loss: 0.23586348760497677 % Correct: 0.2
Loss: 0.2508643546049519 % Correct: 0.19
Loss: 0.23

Loss: 0.18078034776902394 % Correct: 0.22
Loss: 0.17722383539051112 % Correct: 0.23
Loss: 0.18070555088661308 % Correct: 0.22
Loss: 0.177131008190999 % Correct: 0.23
Loss: 0.18060996574792537 % Correct: 0.22
Loss: 0.1770398019625641 % Correct: 0.23
Loss: 0.18048413642947816 % Correct: 0.22
Loss: 0.17694346889983276 % Correct: 0.23
Loss: 0.18033377347223958 % Correct: 0.22
Loss: 0.1768361394516175 % Correct: 0.23
Loss: 0.18015643267424503 % Correct: 0.22
Loss: 0.17671959233575732 % Correct: 0.23
Loss: 0.1799615371289767 % Correct: 0.22
Loss: 0.17659368752999466 % Correct: 0.23
Loss: 0.17974638639749912 % Correct: 0.22
Loss: 0.17646205435655876 % Correct: 0.23
Loss: 0.1795181786363616 % Correct: 0.22
Loss: 0.17632471923615373 % Correct: 0.23
Loss: 0.17927466555448116 % Correct: 0.22
Loss: 0.17618405982437052 % Correct: 0.23
Loss: 0.17902211423905398 % Correct: 0.22
Loss: 0.1760402445515503 % Correct: 0.23
Loss: 0.17876182766401003 % Correct: 0.22
Loss: 0.17589492272044077 % Correct: 0.23

Loss: 0.16443116253974716 % Correct: 0.3
Loss: 0.16408356988019396 % Correct: 0.28
Loss: 0.16404755365219162 % Correct: 0.28
Loss: 0.16395829213059157 % Correct: 0.28
Loss: 0.16380441820444647 % Correct: 0.28
Loss: 0.16390700556849233 % Correct: 0.28
Loss: 0.16360005453676266 % Correct: 0.28
Loss: 0.16387279630454368 % Correct: 0.26
Loss: 0.16341041640838483 % Correct: 0.29
Loss: 0.16384153073874808 % Correct: 0.26
Loss: 0.1632654308657104 % Correct: 0.28
Loss: 0.16381072393575752 % Correct: 0.26
Loss: 0.16322716331400167 % Correct: 0.28
Loss: 0.163746409730807 % Correct: 0.26
Loss: 0.1633505549841071 % Correct: 0.29
Loss: 0.1635526738863189 % Correct: 0.26
Loss: 0.16360413288875658 % Correct: 0.29
Loss: 0.16312599753919352 % Correct: 0.26
Loss: 0.16382526603910508 % Correct: 0.29
Loss: 0.16249896898547694 % Correct: 0.28
Loss: 0.16386442568147125 % Correct: 0.27
Loss: 0.1618597791353975 % Correct: 0.27
Loss: 0.16375470602402392 % Correct: 0.29
Loss: 0.16143553697797136 % Correct: 0.28

Loss: 0.15488362416177157 % Correct: 0.34
Loss: 0.15353071515353992 % Correct: 0.28
Loss: 0.15428598990778358 % Correct: 0.34
Loss: 0.15341018718813546 % Correct: 0.28
Loss: 0.1538757293789456 % Correct: 0.34
Loss: 0.1533553653078939 % Correct: 0.29
Loss: 0.15350967665971366 % Correct: 0.34
Loss: 0.15346682957099275 % Correct: 0.29
Loss: 0.15308958063406017 % Correct: 0.33
Loss: 0.15360315927511126 % Correct: 0.29
Loss: 0.15265657340926697 % Correct: 0.33
Loss: 0.15354287450268564 % Correct: 0.3
Loss: 0.15234133495880187 % Correct: 0.32
Loss: 0.15354500200473517 % Correct: 0.3
Loss: 0.15219931125497188 % Correct: 0.32
Loss: 0.15379956998822958 % Correct: 0.31
Loss: 0.15234196772443329 % Correct: 0.32
Loss: 0.15433304760156377 % Correct: 0.33
Loss: 0.15302153932570292 % Correct: 0.32
Loss: 0.15512873215243603 % Correct: 0.31
Loss: 0.15482739938867657 % Correct: 0.35
Loss: 0.1563019127611212 % Correct: 0.29
Loss: 0.15504628752989497 % Correct: 0.35
Loss: 0.1544290608181932 % Correct: 0.2

Loss: 0.1465039575322506 % Correct: 0.34
Loss: 0.14449620381945244 % Correct: 0.35
Loss: 0.1448399144772567 % Correct: 0.34
Loss: 0.14345049566419 % Correct: 0.35
Loss: 0.14372660351610314 % Correct: 0.35
Loss: 0.14274679169862098 % Correct: 0.35
Loss: 0.1431503588848199 % Correct: 0.35
Loss: 0.14235837846568894 % Correct: 0.35
Loss: 0.14289495315316542 % Correct: 0.36
Loss: 0.14218862080825678 % Correct: 0.35
Loss: 0.1428295572990615 % Correct: 0.37
Loss: 0.14212380308428446 % Correct: 0.35
Loss: 0.14294680866943016 % Correct: 0.37
Loss: 0.14205931333723543 % Correct: 0.35
Loss: 0.14286188279172643 % Correct: 0.36
Loss: 0.14217977638484264 % Correct: 0.35
Loss: 0.14292996793015705 % Correct: 0.36
Loss: 0.14219285429642364 % Correct: 0.36
Loss: 0.1427300964625828 % Correct: 0.36
Loss: 0.14208710412606015 % Correct: 0.36
Loss: 0.14293409586533767 % Correct: 0.35
Loss: 0.14235368201266538 % Correct: 0.36
Loss: 0.14319709226034744 % Correct: 0.35
Loss: 0.14271506856052657 % Correct: 0.36


Loss: 0.14359010998925026 % Correct: 0.36
Loss: 0.14075991650006264 % Correct: 0.36
Loss: 0.14164996569290017 % Correct: 0.36
Loss: 0.13828673369417627 % Correct: 0.37
Loss: 0.13800479649724356 % Correct: 0.36


In [13]:
batch_size = 1
hidden = model.init_hidden(batch_size = batch_size)
for t in range(5):
    input = Tensor(data[0:batch_size, t], autograd = True)
    rnn_input = embed.forward(input = input)
    output, hidden = model.forward(input = rnn_input, hidden = hidden)
    
target = Tensor(data[0:batch_size, t+1], autograd = True)
loss = criterion.forward(output, target)

ctx = "" 
for idx in data[0:batch_size][0][0:-1]:    
    ctx += vocab[idx] + " " 
    print("Context:",ctx) 
    print("Pred:", vocab[output.data.argmax()])

Context: - 
Pred: office.
Context: - mary 
Pred: office.
Context: - mary moved 
Pred: office.
Context: - mary moved to 
Pred: office.
Context: - mary moved to the 
Pred: office.
