# LSTM-250 Numpy Model Extraction

## Goals

- Loads Dataset and Model from training notebook and validates.
- Translates the model into Numpy operations and validates.
- Dataset updated with the accuracy from the Numpy model.
- Model and dataset exported as sqlite3 database for implementation in C.

**NOTE:** The dataset exported by the training notebook may have incorrect predicted index due to several iterations of model training and not updating the dataset. We'll re-run the predictions here and update the predicted index in the dataset.

# Environment Setup

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import os
import numpy as np
import IPython.display as ipd
from tqdm.auto import tqdm


#if torch.cuda.is_available():
#    device = torch.device('cuda')
#else:
#    device = torch.device('cpu')

#print('Using PyTorch version:', torch.__version__, ' Device:', device)
print('Using PyTorch version:', torch.__version__)
device = 'cpu'

# Load and Validate torch.nn.Module Implementation

## Define Model

**NOTE:** Always copy the following cell from the training notebook.

In [None]:
import torch.nn.utils.rnn as rnn_utils


# LSTM model definition
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.debug = False    # Set it to true to print debug info
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
        #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    # Expects a padded_sequence of batched input and the lengths of the sequences
    def forward(self, pad_seq, lengths):        
        if self.debug: print('DEBUG START: LSTM model ---')

        # Extract batch size for initialization of hidden state
        batch_size = len(pad_seq)
            
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        
        # Convert padded sequence to variable length packed sequence for LSTM
        packed_seq = rnn_utils.pack_padded_sequence(pad_seq, lengths, enforce_sorted=False, batch_first=True)
        
        # Forward propagate LSTM, returns a packed sequence
        out_packed, _ = self.lstm(packed_seq, (h0, c0))
        
        # Extract final hidden states of each sequence for the output layer
        out_pad, out_lens = rnn_utils.pad_packed_sequence(out_packed, batch_first=True)
        out_indx = out_lens - 1   # indices of the last valid hidden state in the padded sequence
        last_hidden = out_pad[range(batch_size), out_indx].contiguous()  # select the last valid state in each sequence, and make them contiguous for efficiency
        self.lstm_outpad = out_pad   # Save for later testing
                
        if self.debug:
            print('last_hidden size:', last_hidden.size())
            print('last_hidden:\n', last_hidden)
        
        # Decode the hidden state of the last time step only (for whole batch)
        out = self.fc(last_hidden)
        if self.debug: print('DEBUG END: LSTM model ---')
        return out

## Load Saved Model

In [None]:
!ls -ltrh ./session/
print('')

# Load saved model dictionary
model_path = './session/trained-lstm250.pt'
model_dict = torch.load(model_path)
print(model_dict.keys())


# Parse the values for easier use
Accuracy = model_dict['accuracy']
Correct_count = model_dict['correct_count']
Index_to_label = model_dict['index_to_label']
Label_to_index = {label:index for index, label in Index_to_label.items()}
Hparam = model_dict['Hparam']
Model_state_dict = model_dict['state_dict']
Model_perf = f'Model Performance:   accuracy: {Accuracy:.2f}%   correct_count: {Correct_count}'  # to be used later
print('Hparam:', Hparam)
print('Model_perf:', Model_perf)


# move all weights to cpu
for key in Model_state_dict: 
    Model_state_dict[key] = Model_state_dict[key].to('cpu')
    
    
# Instantiate the model
model_pt = LSTM(Hparam['input_size'], Hparam['hidden_size'], Hparam['num_layers'], Hparam['num_classes'])
model_pt.load_state_dict(Model_state_dict)
model_pt.to('cpu')
model_pt.eval()     # we are always evaluating here
print(model_pt)

In [None]:
# Delete names to avoid confusions later
del model_path, model_dict

## Load Saved Dataset

In [None]:
# Prints information about dataset item
def print_dataItem(item):
    mstr = f"label: {item[0]}, label_index: {item[1]}, predicted_index: {item[2]}, sequence_length: {item[3]},"
    mstr2 = f"\nfeature_sequence shape: {item[4].shape}, feature_seq type: {type(item[4])}"
    print(mstr, mstr2)

    
# Load the test dataset
ds_path = "./session/test-export-ds.pt"
DS_loaded = torch.load(ds_path)
print(DS_loaded.keys())
print('DS_loaded len:', len(DS_loaded['dataset']))
print('schema:', DS_loaded['dataset_schema'])


# Make sure the label-to-index dictionary matches the one in the model
for key in DS_loaded['label_dict']:
    assert DS_loaded['label_dict'][key] == Label_to_index[key], 'Dataset and Model Label-to-index are different'
print('INFO: Dataset and Model label-to-index matched')


# show an item information
DataItems = DS_loaded['dataset']
item = DataItems[0]
print_dataItem(item)

In [None]:
# Delete names to avoid confusions later
del ds_path, key, item

## Validate Loaded Model

In [None]:
# Padding is needed to make the batch <tensor> from <list> of variable length sequences
# The padding values are not passed to the LSTM during trainig/testing
def pad_sequence_lstm(batch):
    # Make all tensor in a batch the same length by padding with zeros
    batch = rnn_utils.pad_sequence(batch, batch_first=True, padding_value=0.)
    return batch


# Gets the list of audio and labels as batch then
# converts them into sequence of features for the model.
# Adds padding to build the batch tensor
def collate_fn_lstm(batch):
    tensors, targets, lengths = [], [], []   # lengths is needed for pack_padded_sequence  in LSTM.forward()

    # Gather in lists, and encode labels as indices
    for item in batch:
        label, feat_seq = item
        feat_seq_tensor = torch.from_numpy(feat_seq)
        tensors += [feat_seq_tensor]
        targets += [Label_to_index[label]]
        lengths.append(feat_seq_tensor.size()[0])

    # Group the list of tensors into a batched tensor
    tensors = pad_sequence_lstm(tensors)
    targets = torch.tensor(targets)
    lengths = torch.tensor(lengths)
    return tensors, targets, lengths

In [None]:
def get_likely_index(tensor):
    # find most likely label index for each element in the batch
    return tensor.argmax(dim=-1)


# Given an item form the test_dataset, returns an example for predict() function
# numpytype: set it to True to return numpy nd-array
def make_example(data_item):
    label = data_item[0]
    feat_seq = data_item[4]
    return label, feat_seq


# Return the prediction using nn.Module instance
def predictNN(example, model=None):    # feat_seq: np.ndarray
    model.eval()
    batch = [example]   # make a batch with single example
    tensor, target, lengths = collate_fn_lstm(batch)
    # Use the model to predict the label index
    output = model(tensor, lengths)
    pred = get_likely_index(output)[0]   # indexing to get the prediction from batch    
    return pred.item()


# Run a prediction
select_index = 1006
item = DataItems[select_index]
example = make_example(item)
pred_index = predictNN(example, model=model_pt)
pred_label  = Index_to_label[pred_index]
phone, *_, feat_seq = item
print(f"Expected: {phone}. Predicted: {pred_label}.")

In [None]:
# Validate the Given model on the whole dataset
# ptmodel: set it to True for the PyTorch model
def validateModel(model, predict_fn):
    dataset = DataItems
    expect_miss = 0      # keeps track of no. of mismatche between prediction in dataset vs model prediction
    total_count = 0
    correct_count = 0
    for item in tqdm(dataset):
        lbl, lbl_index, pred_index, seq_len, feat_seq = item
        example = make_example(item)
        pred = predict_fn(example, model=model)
        if pred != pred_index: expect_miss += 1    # prediction does not match prediction in dataset
        if pred == lbl_index: correct_count += 1   # prediction matched the actual label-index
        total_count += 1
    # Compute and print statistics
    accuracy = (100.0 * correct_count) / total_count
    print(f'Validation accuracy: {accuracy:.2f}%   correct_count: {correct_count}   expected-miss: {expect_miss}   total_count: {total_count}')
    return accuracy, correct_count, expect_miss, total_count

            
# Validate the loaded model
validateModel(model_pt, predictNN)
print('Expected', Model_perf)

In [None]:
# Delete names to avoid confusion
del select_index, item, example, pred_index, pred_label, phone, feat_seq

# Implementation Using torch.tensor Operations

## LSTM Refresher

Following notes are taken from online book [d2l.ai](https://d2l.ai/chapter_recurrent-modern/lstm.html)

![image.png](https://d2l.ai/_images/lstm-3.svg)


Following operations are independent. As a result, they can be performed in parallel.
\begin{split}\begin{aligned}
\mathbf{I}_t &= \sigma(\mathbf{X}_t \mathbf{W}_{xi} + \mathbf{H}_{t-1} \mathbf{W}_{hi} + \mathbf{b}_i),\\
\mathbf{F}_t &= \sigma(\mathbf{X}_t \mathbf{W}_{xf} + \mathbf{H}_{t-1} \mathbf{W}_{hf} + \mathbf{b}_f),\\
\mathbf{O}_t &= \sigma(\mathbf{X}_t \mathbf{W}_{xo} + \mathbf{H}_{t-1} \mathbf{W}_{ho} + \mathbf{b}_o), \\
\tilde{\mathbf{C}}_t &= \text{tanh}(\mathbf{X}_t \mathbf{W}_{xc} + \mathbf{H}_{t-1} \mathbf{W}_{hc} + \mathbf{b}_c),
\end{aligned}\end{split}

Following operations depends on some of the above results and need to be performed in order (sequential).
\begin{split}\begin{aligned}
\mathbf{C}_t &= \mathbf{F}_t \odot \mathbf{C}_{t-1} + \mathbf{I}_t \odot \tilde{\mathbf{C}}_t.\\
\mathbf{H}_t &= \mathbf{O}_t \odot \tanh(\mathbf{C}_t).
\end{aligned}\end{split}


In [None]:
# Here, the weights of all gates (It, Ft, Ot, Ct) are merged into a single matrix
for key, val in Model_state_dict.items():
    print(key, val.size())

In [None]:
# Defining the LSTM cell
# weight_ih: weights for input x
# weight_hh: weights for hidden state h_prev
def tensorLSTMCell(x, h_prev, c_prev, weight_ih, weight_hh, bias_ih, bias_hh, input_size, hidden_size):
    gates = x @ weight_ih.t()  +  h_prev @ weight_hh.t()  +  bias_ih  +  bias_hh

    i = torch.sigmoid(gates[:hidden_size])
    f = torch.sigmoid(gates[hidden_size:2*hidden_size])
    g = torch.tanh(gates[2*hidden_size:3*hidden_size])   # equivalent of ~Ct in above figure
    o = torch.sigmoid(gates[3*hidden_size:])             # equivalent of Ot in above figure
    #print(f.size())
    #f * c_prev
    c = f * c_prev + i * g   # Ct
    h = o * torch.tanh(c)    # Ht
    return h, c


# Implements 3 layers of LSTM
def tensorLSTM3(x, h_prev, c_prev, weight_dict, input_size, hidden_size):
    # Perform compatability checks
    layer_count=3
    assert len(h_prev) == layer_count, "You need to provide initial values for all layers"
    assert len(c_prev) == layer_count, "You need to provide initial values for all layers"
    assert len(x) == input_size, "Input size mismatch"
    # Layer-1
    h0_cur, c0_cur = tensorLSTMCell(x, h_prev[0], c_prev[0], 
                                   weight_dict['lstm.weight_ih_l0'], weight_dict['lstm.weight_hh_l0'],
                                   weight_dict['lstm.bias_ih_l0'], weight_dict['lstm.bias_hh_l0'],
                                   input_size, hidden_size)
    # Layer-2
    h1_cur, c1_cur = tensorLSTMCell(h0_cur, h_prev[1], c_prev[1], 
                                   weight_dict['lstm.weight_ih_l1'], weight_dict['lstm.weight_hh_l1'],
                                   weight_dict['lstm.bias_ih_l1'], weight_dict['lstm.bias_hh_l1'],
                                   hidden_size, hidden_size)
    # Layer-3
    h2_cur, c2_cur = tensorLSTMCell(h1_cur, h_prev[2], c_prev[2], 
                                   weight_dict['lstm.weight_ih_l2'], weight_dict['lstm.weight_hh_l2'],
                                   weight_dict['lstm.bias_ih_l2'], weight_dict['lstm.bias_hh_l2'],
                                   hidden_size, hidden_size)
    return (h0_cur, h1_cur, h2_cur), (c0_cur, c1_cur, c2_cur)
    
    

# Implementation of ully connected layer 
def tensorFClayer(x, weight, bias):
    return  x @ weight.t()  +  bias



In [None]:
# Select and example to run through the manual model for validation
label, feat_seq = make_example(DataItems[0])
feat_seq = torch.from_numpy(feat_seq)
feat_seq = feat_seq[:1, :]      # make it a sequence of 1 token
seq_len  = torch.tensor(1)
print('feat_seq:', feat_seq.size())

# Initial states
h0_3 = torch.zeros(3, Hparam['hidden_size'])
c0_3 = torch.zeros(3, Hparam['hidden_size'])

# Pass one token in sequence through the LSTM cell
h_man3, c_man3 = tensorLSTM3(feat_seq[0], h0_3, c0_3, Model_state_dict, Hparam['input_size'], Hparam['hidden_size'])
h_man = h_man3[2]
print('h_man  :', h_man.size())


# Pass the same sequence through the LSTM model layer
model_pt.eval()
out_model = model_pt(feat_seq.unsqueeze(0), seq_len.unsqueeze(0))
h_model = model_pt.lstm_outpad[0]   # get the lstm layer output for the first token
print('h_model:', h_model.size())


# Compare the values in two methods
abs_diff = torch.abs(h_man - h_model)
max_diff = torch.max(abs_diff)
tolerance = 10e-6
print('max_diff:', max_diff.item())
print('max_diff < Tolerance:', (max_diff<tolerance).item())
assert max_diff<tolerance

In [None]:
# Pass the LSTM output through the Fully-Connected Layer
out_man = tensorFClayer(h_man, Model_state_dict['fc.weight'], Model_state_dict['fc.bias'])
print('out_man  :', out_man.size())

# Compare the FC output with the LSTM model output
print('out_model:', out_model.size())
abs_diff = torch.abs(out_man - out_model)
max_diff = torch.max(abs_diff)
tolerance = 10e-6
print('max_diff:', max_diff.item())
print('max_diff < Tolerance:', (max_diff<tolerance).item())
assert max_diff<tolerance

In [None]:
# Delete names to avoid confusion
del key, val
del label, feat_seq, seq_len, h_man
del h0_3, c0_3, h_man3, c_man3, out_model, h_model, abs_diff, max_diff, tolerance, out_man

## Implement the Complete Manual Model

In [None]:
def tensorModel(feat_seq):
    # Initial states
    h0_3 = torch.zeros(3, Hparam['hidden_size']).to(device) 
    c0_3 = torch.zeros(3, Hparam['hidden_size']).to(device)

    # Pass sequence through the LSTM cell
    h_prev, c_prev = h0_3, c0_3
    for tok in feat_seq:
        ht, ct = tensorLSTM3(tok, h_prev, c_prev, Model_state_dict, Hparam['input_size'], Hparam['hidden_size'])
        h_prev, c_prev = ht, ct
    last_hidden = ht[2]
    out = tensorFClayer(last_hidden, Model_state_dict['fc.weight'], Model_state_dict['fc.bias'])
    return out

In [None]:
# Select and example to run through the manual model for validation
label, feat_seq = make_example(DataItems[0])
feat_seq = torch.from_numpy(feat_seq)
seq_len = torch.tensor(len(feat_seq))
print('feat_seq:', feat_seq.size())

out_man = tensorModel(feat_seq)
print('out_man:', out_man.size())

out_model = model_pt(feat_seq.unsqueeze(0), seq_len.unsqueeze(0))
print('out_model:', out_model.size())

# Compare the two outputs
abs_diff = torch.abs(out_man - out_model)
max_diff = torch.max(abs_diff)
tolerance = 10e-6
print('max_diff:', max_diff.item())
print('max_diff < Tolerance:', (max_diff<tolerance).item())
assert max_diff<tolerance

In [None]:
# Driver for manual tensor based model
def predict_tensorModel(example, **kwargs):
    # Use the model to predict the label of the waveform
    label, feat_seq = example
    feat_seq = torch.from_numpy(feat_seq)
    output = tensorModel(feat_seq)
    pred = get_likely_index(output)   # indexing to get the prediction from batch
    return pred.item()


# test above function
item = DataItems[120]
print_dataItem(item)
example = make_example(item)
pred = predict_tensorModel(example)
print('pred:', pred)

# Run on entire test-dataset
accuracy, correct_count, expect_miss, total_count = validateModel(None, predict_tensorModel)
print('Expected', Model_perf)

In [None]:
del abs_diff, accuracy, correct_count, example
del expect_miss, feat_seq, item, label
del max_diff, out_man, out_model, pred, seq_len
del tolerance, total_count

# Implement Using Numpy Matrix Operations

In [None]:
# Copy weights as numpy ndarray
Weights_np = {}
for k, v in Model_state_dict.items():
    Weights_np[k] = Model_state_dict[k].detach().numpy()
    
for k, v in Weights_np.items():
    print(k, v.shape)

In [None]:
# Transpose the weights beforehand
for k, v in Weights_np.items():
    if 'weight' in k:
        Weights_np[k] = v.T
        print('Transposed:', k)

## Layer and Model Definitions

In [None]:
# Sigmoid activation in numpy
def npSigmoid(x):
    return 1 / (1 + np.exp(-x))


# Defining the LSTM cell
# weight_ih: weights for input x
# weight_hh: weights for hidden state h_prev
def npLSTMCell(x, h_prev, c_prev, weight_ih, weight_hh, bias_ih, bias_hh, input_size, hidden_size):
    gates = x @ weight_ih + h_prev @ weight_hh + bias_ih + bias_hh

    i = npSigmoid(gates[:hidden_size])
    f = npSigmoid(gates[hidden_size:2*hidden_size])
    g = np.tanh(gates[2*hidden_size:3*hidden_size])   # equivalent of ~Ct in above figure
    o = npSigmoid(gates[3*hidden_size:])             # equivalent of Ot in above figure
    c = f * c_prev + i * g   # Ct
    h = o * np.tanh(c)    # Ht
    return h, c


# Implements 3 layers of LSTM
def npLSTM3(x, h_prev, c_prev, weight_dict, input_size, hidden_size):
    # Perform compatability checks
    layer_count=3
    assert len(h_prev) == layer_count, "You need to provide initial values for all layers"
    assert len(c_prev) == layer_count, "You need to provide initial values for all layers"
    assert len(x) == input_size, "Input size mismatch"
    # Layer-1
    h0_cur, c0_cur = npLSTMCell(x, h_prev[0], c_prev[0], 
                                weight_dict['lstm.weight_ih_l0'], weight_dict['lstm.weight_hh_l0'],
                                weight_dict['lstm.bias_ih_l0'], weight_dict['lstm.bias_hh_l0'],
                                input_size, hidden_size)
    # Layer-2
    h1_cur, c1_cur = npLSTMCell(h0_cur, h_prev[1], c_prev[1], 
                                weight_dict['lstm.weight_ih_l1'], weight_dict['lstm.weight_hh_l1'],
                                weight_dict['lstm.bias_ih_l1'], weight_dict['lstm.bias_hh_l1'],
                                hidden_size, hidden_size)
    # Layer-3
    h2_cur, c2_cur = npLSTMCell(h1_cur, h_prev[2], c_prev[2], 
                                weight_dict['lstm.weight_ih_l2'], weight_dict['lstm.weight_hh_l2'],
                                weight_dict['lstm.bias_ih_l2'], weight_dict['lstm.bias_hh_l2'],
                                hidden_size, hidden_size)
    return (h0_cur, h1_cur, h2_cur), (c0_cur, c1_cur, c2_cur)
    
    

# Implementation of ully connected layer 
def npFClayer(x, weight, bias):
    return x @ weight + bias

In [None]:
# Complete Model using numpy
def npModel(feat_seq):
    # Initial states
    h0_3 = np.zeros((3, Hparam['hidden_size']))
    c0_3 = np.zeros((3, Hparam['hidden_size']))

    # Pass sequence through the LSTM cell
    h_prev, c_prev = h0_3, c0_3
    for tok in feat_seq:
        ht, ct = npLSTM3(tok, h_prev, c_prev, Weights_np, Hparam['input_size'], Hparam['hidden_size'])
        h_prev, c_prev = ht, ct
    last_hidden = ht[2]
    out = npFClayer(last_hidden, Weights_np['fc.weight'], Weights_np['fc.bias'])
    return out


# Select and example to run through the numpy model for validation
label, feat_seq = make_example(DataItems[0])
feat_seq = torch.from_numpy(feat_seq)
seq_len = torch.tensor(len(feat_seq))
feat_seq = feat_seq.to(device)
print('feat_seq:', feat_seq.size())
out_model = model_pt(feat_seq.unsqueeze(0), seq_len.unsqueeze(0))
print('out_model:', out_model.size())

feat_seq = feat_seq.to('cpu').detach().numpy()
out_np = npModel(feat_seq)
print('out_np:', out_np.shape)

# Compare the two outputs
out_model = out_model.to('cpu').detach().numpy()
abs_diff = np.abs(out_np - out_model)
max_diff = np.max(abs_diff)
tolerance = 10e-6
print('max_diff:', max_diff.item())
print('max_diff < Tolerance:', (max_diff<tolerance).item())
assert max_diff<tolerance

## Validate Model on Entire Dataset

In [None]:
# Given a numpy array, returns the index of the maximum value
def get_likely_index_np(nparray):
    return nparray.argmax()


# Driver for manual numpy based model
def predict_npModel(example, **kwargs):
    # Use the model to predict the label of the waveform
    label, feat_seq = example
    output = npModel(feat_seq)
    pred = get_likely_index_np(output)   # indexing to get the prediction from batch
    return pred.item()


# item: (label, label_index, sequence_length, feature_length, predicted_index, feature_seqeunce)
item = DataItems[120]
print_dataItem(item)
example = make_example(item)
pred = predict_npModel(example)
print('pred:', pred)

# Run on entire test-dataset
accuracy, correct_count, expect_miss, total_count = validateModel(None, predict_npModel)
print('Expected', Model_perf)

# Update the dataset with the Numpy Model Predicted index

---

# Export Numpy Model as sqlite3 DB

# Export the Dataset as sqlite3 DB