In [2]:
import torch
import numpy as np
from models import BidirectionalLSTM

import pickle
import spacy
import math
import random

SEED = 1234

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
with open("vocab.pickle", "rb") as vocabf:
    vocab = pickle.load(vocabf)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
INPUT_DIM = len(vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
model = BidirectionalLSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, 
                          N_LAYERS, BIDIRECTIONAL, DROPOUT).to(device)

In [6]:
model.load_state_dict(torch.load("Bidirectional.pth"))

In [7]:
d = model.state_dict()
d

OrderedDict([('embedding.weight',
              tensor([[-0.0181,  0.0165, -0.0204,  ...,  0.0213,  0.0406, -0.0102],
                      [ 0.0134,  0.0107, -0.0403,  ..., -0.0323,  0.0394,  0.0371],
                      [-0.0885, -0.3074,  0.7475,  ..., -0.0910,  0.8222,  0.2536],
                      ...,
                      [ 0.0820, -0.0867, -0.1013,  ..., -0.0584,  0.0725,  0.0959],
                      [-0.0711,  0.0348,  0.0509,  ...,  0.0532, -0.0346, -0.0408],
                      [ 0.0094, -0.0319, -0.0387,  ..., -0.0087,  0.0027, -0.0095]],
                     device='cuda:0')),
             ('rnn.weight_ih_l0',
              tensor([[ 0.0248, -0.0046, -0.0756,  ...,  0.0145, -0.1147, -0.0421],
                      [ 0.0216,  0.0036, -0.0365,  ...,  0.0704, -0.0165, -0.0655],
                      [ 0.0091, -0.0258, -0.0006,  ...,  0.1090, -0.0931, -0.0603],
                      ...,
                      [-0.1282, -0.0360, -0.0879,  ...,  0.0311, -0.0809, -0.0810

In [8]:
def sentence_to_tensor(sentence, vocab):
    nlp = spacy.load("en")
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [vocab[t] for t in tokenized]

    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    return tensor

In [9]:
input = sentence_to_tensor("good movie, but it has some flaws", vocab)

In [10]:
#transform input tensor (nwords x 1) into embeddings tensor (nwords x embedding_dim)
embeddings = model.dropout(model.embedding(input)) #squeeze removes second dimension
embeddings.shape

torch.Size([8, 1, 100])

In [11]:
def extract_params(n_layers, pytorch_dict):
    result = {}
    gate_ids = set(zip(range(4), ["i", "f", "g", "o"]))
    tensor_ids = ["i", "h"]
    params = ["weight", "bias"]
    
    for rev in range(2):
        revstr = ("_reverse" * rev)
        for param in params:
            for l in range(n_layers):
                for tensor_id in tensor_ids:
                    tensor = f"rnn.{param}_{tensor_id}h_l{l}" + revstr
                    for idx, id in gate_ids:
                        result[f"{param}_{tensor_id}{id}{l}"+revstr] = \
                            pytorch_dict[tensor][(idx*HIDDEN_DIM):((idx+1)*HIDDEN_DIM)]
    return result

In [12]:
def infer_gates(xt, ht, ct, params, l, t, rev=False):
    """
    Assume the following dimension:
    - xt.shape = [EMBEDDING_DIM x 1]
    - ht.shape = [HIDDEN_DIM x 1]
    - ct.shape = [HIDDEN_DIM x 1]
    - weights[Wi*] = [HIDDEN_DIM x EMBEDDING_DIM]
    - weights[Wh*] = [HIDDEN_DIM x HIDDEN_DIM]
    - biases[*] = [HIDDEN_DIM]
    """
    res = {}
    tanh = torch.nn.Tanh()
    
    wstr = "weight_"
    bstr = "bias_"
    revstr = "_reverse"
    
    #how do I loop this
    Wiil = wstr + f"ii{l}" + (revstr * rev)
    biil = bstr + f"ii{l}" + (revstr * rev)
    Whil = wstr + f"hi{l}" + (revstr * rev)
    bhil = bstr + f"hi{l}" + (revstr * rev)
    Wifl = wstr + f"if{l}" + (revstr * rev)
    bifl = bstr + f"if{l}" + (revstr * rev)
    Whfl = wstr + f"hf{l}" + (revstr * rev)
    bhfl = bstr + f"hf{l}" + (revstr * rev)
    Wigl = wstr + f"ig{l}" + (revstr * rev)
    bigl = bstr + f"ig{l}" + (revstr * rev)
    Whgl = wstr + f"hg{l}" + (revstr * rev)
    bhgl = bstr + f"hg{l}" + (revstr * rev)
    Wiol = wstr + f"io{l}" + (revstr * rev)
    biol = bstr + f"io{l}" + (revstr * rev)
    Whol = wstr + f"ho{l}" + (revstr * rev)
    bhol = bstr + f"ho{l}" + (revstr * rev)
    itl = f"i{t}{l}" + (revstr * rev)
    ftl = f"f{t}{l}" + (revstr * rev)
    gtl = f"g{t}{l}" + (revstr * rev)
    otl = f"o{t}{l}" + (revstr * rev)
    #tt is t+1 in variablenameland
    cttl = f"c{t+1}{l}" + (revstr * rev)
    httl = f"h{t+1}{l}" + (revstr * rev)
    
    res[itl] = infer_gate(params[Wiil], xt, params[biil],
                    params[Whil], ht, params[bhil], torch.sigmoid)

    res[ftl] = infer_gate(params[Wifl], xt, params[bifl],
                    params[Whfl], ht, params[bhfl], torch.sigmoid)

    res[gtl] = infer_gate(params[Wigl], xt, params[bigl],
                    params[Whgl], ht, params[bhgl], tanh)       

    res[otl] = infer_gate(params[Wiol], xt, params[biol],
                    params[Whol], ht, params[bhol], torch.sigmoid) 

    res[cttl] = torch.add(torch.mul(res[ftl], ct), torch.mul(res[itl], res[gtl]))
    res[httl] = torch.mul(res[otl], tanh(res[cttl]))
    
    return res

def infer_gate(i_w, x, b_i, h_w, h, b_h, fun):
    input = torch.addmm(torch.unsqueeze(b_i, -1), i_w, x)
    hidden = torch.addmm(torch.unsqueeze(b_h, -1), h_w, h)
    
    return fun(torch.add(input, hidden))

In [13]:
def forward_pass(embeddings, n_layers, params, hidden_dim):
    result = {} #saves values of states and gates
    revstr = "_reverse"
    
    for l in range(n_layers):
        #random initialization of both hidden and cell states
        result[f"h0{l}"] = torch.zeros(hidden_dim, 1).to(device)
        result[f"c0{l}"] = torch.zeros(hidden_dim, 1).to(device)
        #reverse aswell
        result[f"h0{l}{revstr}"] = torch.zeros(hidden_dim, 1).to(device)
        result[f"c0{l}{revstr}"] = torch.zeros(hidden_dim, 1).to(device)
        
        for t in range(len(embeddings)):
            #if layer > 1, then the input isn't the model input, but the hidden state from the previous
            #layer
            reverse_t = len(embeddings)-(t+1)
            if l == 0:
                input = torch.transpose(embeddings[t],0 , 1)
                input_rev = torch.transpose(embeddings[reverse_t], 0, 1)
            else:
                input = torch.cat((result[f"h{t+1}{l-1}"], result[f"h{reverse_t+1}{l-1}{revstr}"]))
                input_rev = torch.cat((result[f"h{reverse_t+1}{l-1}"],
                                      result[f"h{t+1}{l-1}{revstr}"]))
                
            result.update(infer_gates(input, result[f"h{t}{l}"], 
                                      result[f"c{t}{l}"], params, l, t))
            result.update(infer_gates(input_rev, result[f"h{t}{l}{revstr}"], result[f"c{t}{l}{revstr}"], 
                                      params, l, t, rev=True))
    return result
            

In [14]:
for i in range(10):
    acts = forward_pass(embeddings, N_LAYERS, extract_params(2, d), HIDDEN_DIM)
    output = torch.cat((acts["h41"], acts["h41_reverse"]))
    output = model.dropout(output)
    print(torch.sigmoid(model.fc(torch.transpose(output, 0 ,1))))

tensor([[0.1123]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1774]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.0975]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1232]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1375]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1055]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1018]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1441]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1307]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[0.1367]], device='cuda:0', grad_fn=<SigmoidBackward>)


In [15]:
acts

{'h00': tensor([[0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
   

In [16]:
import json

In [24]:
acts_lists = {}
for (key,value) in acts.items():
    acts_lists[key] = value.tolist()

In [26]:
with open("activation.json", "w") as f:
    json.dump(acts_lists, f)