In [27]:
import torch
import numpy as np
from models import BidirectionalLSTM

import pickle
import spacy
import math
import random

SEED = 1234

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
with open("vocab.pickle", "rb") as vocabf:
    vocab = pickle.load(vocabf)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
INPUT_DIM = len(vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
model = BidirectionalLSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, 
                          N_LAYERS, BIDIRECTIONAL, DROPOUT).to(device)

In [5]:
model.load_state_dict(torch.load("Bidirectional.pth"))

In [186]:
d = model.state_dict()
d

OrderedDict([('embedding.weight',
              tensor([[-0.0181,  0.0165, -0.0204,  ...,  0.0213,  0.0406, -0.0102],
                      [ 0.0134,  0.0107, -0.0403,  ..., -0.0323,  0.0394,  0.0371],
                      [-0.0885, -0.3074,  0.7475,  ..., -0.0910,  0.8222,  0.2536],
                      ...,
                      [ 0.0820, -0.0867, -0.1013,  ..., -0.0584,  0.0725,  0.0959],
                      [-0.0711,  0.0348,  0.0509,  ...,  0.0532, -0.0346, -0.0408],
                      [ 0.0094, -0.0319, -0.0387,  ..., -0.0087,  0.0027, -0.0095]],
                     device='cuda:0')),
             ('rnn.weight_ih_l0',
              tensor([[ 0.0248, -0.0046, -0.0756,  ...,  0.0145, -0.1147, -0.0421],
                      [ 0.0216,  0.0036, -0.0365,  ...,  0.0704, -0.0165, -0.0655],
                      [ 0.0091, -0.0258, -0.0006,  ...,  0.1090, -0.0931, -0.0603],
                      ...,
                      [-0.1282, -0.0360, -0.0879,  ...,  0.0311, -0.0809, -0.0810

In [7]:
def sentence_to_tensor(sentence, vocab):
    nlp = spacy.load("en")
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [vocab[t] for t in tokenized]

    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    return tensor

In [8]:
input = sentence_to_tensor("This film is great", vocab)

In [76]:
#transform input tensor (nwords x 1) into embeddings tensor (nwords x embedding_dim)
embeddings = model.dropout(model.embedding(input)) #squeeze removes second dimension
embeddings.shape

torch.Size([4, 1, 100])

In [91]:
weights = {}
gate_ids = set(zip(range(4), ["i", "f", "g", "o"]))
w_tensor_ids = ["i", "h"]

for l in range(N_LAYERS):
    for w_tensor_id in w_tensor_ids:
        w_tensor = f"rnn.weight_{w_tensor_id}h_l{l}"
        for idx, id in gate_ids:
            weights[f"W{w_tensor_id}{id}{l}"] = d[w_tensor][(idx*HIDDEN_DIM):((idx+1)*HIDDEN_DIM)]

In [92]:
biases = {}
for l in range(N_LAYERS):
    for w_tensor_id in w_tensor_ids:
        w_tensor = f"rnn.bias_{w_tensor_id}h_l{l}"
        for idx, id in gate_ids:
            biases[f"b{w_tensor_id}{id}{l}"] = d[w_tensor][(idx*HIDDEN_DIM):((idx+1)*HIDDEN_DIM)]

In [188]:
def extract_params(n_layers, pytorch_dict):
    result = {}
    gate_ids = set(zip(range(4), ["i", "f", "g", "o"]))
    tensor_ids = ["i", "h"]
    params = ["weight", "bias"]
    
    for rev in range(2):
        revstr = ("_reverse" * rev)
        for param in params:
            for l in range(n_layers):
                for tensor_id in tensor_ids:
                    tensor = f"rnn.{param}_{tensor_id}h_l{l}" + revstr
                    for idx, id in gate_ids:
                        result[f"{param}_{tensor_id}{id}{l}"+revstr] = \
                            pytorch_dict[tensor][(idx*HIDDEN_DIM):((idx+1)*HIDDEN_DIM)]
    return result

In [155]:
def infer_gates(xt, ht, ct, weights, biases, l, t rev=False):
    """
    Assume the following dimension:
    - xt.shape = [EMBEDDING_DIM x 1]
    - ht.shape = [HIDDEN_DIM x 1]
    - ct.shape = [HIDDEN_DIM x 1]
    - weights[Wi*] = [HIDDEN_DIM x EMBEDDING_DIM]
    - weights[Wh*] = [HIDDEN_DIM x HIDDEN_DIM]
    - biases[*] = [HIDDEN_DIM]
    """
    res = {}
    tanh = torch.nn.Tanh()

    ht = torch.transpose(ht, 0, 1)
    ct = torch.transpose(ct, 0, 1)
    
    #how do i loop this
    Wiil = f"Wii{l}" + ("_rev" * rev)
    biil = f"bii{l}" + ("_rev" * rev)
    Whil = f"Whi{l}" + ("_rev" * rev)
    bhil = f"bhi{l}" + ("_rev" * rev)
    Wifl = f"Wif{l}" + ("_rev" * rev)
    bifl = f"bhi{l}" + ("_rev" * rev)
    Whfl = f"Whf{l}" + ("_rev" * rev)
    bhfl = f"bif{l}" + ("_rev" * rev)
    Wigl = f"Wig{l}" + ("_rev" * rev)
    bigl = f"big{l}" + ("_rev" * rev)
    Whgl = f"Whg{l}" + ("_rev" * rev)
    bhgl = f"bhg{l}" + ("_rev" * rev)
    Wiol = f"Wio{l}" + ("_rev" * rev)
    biol = f"bio{l}" + ("_rev" * rev)
    Whol = f"Who{l}" + ("_rev" * rev)
    bhol = f"bho{l}" + ("_rev" * rev)
    itl = f"i{t}{l}" + ("_rev" * rev)
    ftl = f"f{t}{l}" + ("_rev" * rev)
    gtl = f"g{t}{l}" + ("_rev" * rev)
    otl = f"o{t}{l}" + ("_rev" * rev)
    #tt is t+1 in variablenameland
    cttl = f"c{t+1}{l}" + ("_rev" * rev)
    httl = f"h{t+1}{l}" + ("_rev" * rev)
    
    res[itl] = infer_gate(weights[Wiil], xt, biases[biil],
                    weights[Whil], ht, biases[bhil])

    res[ftl] = infer_gate(weights[Wifl], xt, biases[bifl],
                    weights[Whfl], ht, biases[bhfl])

    res[gtl] = infer_gate(weights[Wigl], xt, biases[bigl],
                    weights[Whgl], ht, biases[bhgl])       

    res[otl] = infer_gate(weights[Wiol], xt, biases[biol],
                    weights[Whol], ht, biases[bhol]) 

    res[cttl] = torch.add(torch.mul(res[ftl], ct), torch.mul(res[itl], res[gtl]))
    res[httl] = torch.mul(res[otl], tanh(res[cttl]))
    
    return res

def infer_gate(i_w, x, b_i, h_w, h, b_h):
    input = torch.addmm(torch.unsqueeze(b_i, -1), i_w, x)
    hidden = torch.addmm(torch.unsqueeze(b_h, -1), h_w, h)
    
    return torch.sigmoid(torch.add(input, hidden))

In [163]:
gates = infer_gates(embeddings[0], h00, c00, weights, biases, 0, 0)

In [176]:
def forward_pass(embeddings, n_layers, weights, biases, hidden_dim):
    result = {} #saves values of states and gates
    for l in range(n_layers):
        
        #random initialization of both hidden and cell states
        result[f"h0{l}"] = torch.randn(1, hidden_dim).to(device)
        result[f"c0{l}"] = torch.randn(1, HIDDEN_DIM).to(device)
        #reverse aswell
        result[f"h0{l}_rev"] = torch.randn(1, hidden_dim).to(device)
        result[f"c0{l}_rev"] = torch.randn(1, HIDDEN_DIM).to(device)
        
        for t in range(len(embeddings)):
            result.update(infer_gates(embeddings[t], result[f"h{t}{l}"], result[f"c{t}{l}"],
                                                weights, biases, l, t))
            result.update(infer_gates(embeddings[len(embeddings)-(t+1)], result[f"h{t}{l}_rev"], 
                                      result[f"c{t}{l}_rev"], weights_rev, biases_rev, l, t, 
                                      rev=True))
    return result
            

In [164]:
len(embeddings)

4

In [189]:
extract_params(2, d)

{'weight_ii0': tensor([[ 0.0248, -0.0046, -0.0756,  ...,  0.0145, -0.1147, -0.0421],
         [ 0.0216,  0.0036, -0.0365,  ...,  0.0704, -0.0165, -0.0655],
         [ 0.0091, -0.0258, -0.0006,  ...,  0.1090, -0.0931, -0.0603],
         ...,
         [-0.0771, -0.0444, -0.0873,  ..., -0.0467, -0.0704, -0.1100],
         [ 0.0045,  0.0476,  0.0172,  ...,  0.1290, -0.1718, -0.0724],
         [ 0.2454, -0.0159, -0.0032,  ...,  0.2068, -0.2552, -0.1844]],
        device='cuda:0'),
 'weight_if0': tensor([[-0.0161, -0.0296,  0.0744,  ..., -0.1276,  0.0641,  0.0951],
         [-0.0596, -0.0834, -0.0282,  ...,  0.0843,  0.0186, -0.1211],
         [ 0.0291,  0.0659,  0.0295,  ...,  0.0945, -0.0044,  0.0068],
         ...,
         [-0.0659, -0.0846, -0.0493,  ..., -0.0347, -0.0713, -0.0943],
         [-0.0196,  0.0120, -0.0611,  ...,  0.1685, -0.1010, -0.0173],
         [ 0.0563, -0.0047, -0.0174,  ...,  0.0757, -0.0539, -0.0583]],
        device='cuda:0'),
 'weight_ig0': tensor([[ 0.0404,  0.12