# MLP-100 Numpy Model Extraction

# Environment Setup

In [1]:
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm


# We don't need GPU for this, not training
#if torch.cuda.is_available():
#    device = torch.device('cuda')
#else:
#    device = torch.device('cpu')

#print('Using PyTorch version:', torch.__version__, ' Device:', device)
print('Using PyTorch version:', torch.__version__)

Using PyTorch version: 2.0.1


# Load and Validate torch.nn.Module

## Define Model

**NOTE:** Always copy the following cell from the training notebook.

In [2]:
# Define an MLP with single hiddend layer with 12 units and ReLU activation.
class MLP100(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLP100, self).__init__()
        # Save parameters
        self.input_size = input_size
        self.num_classes = num_classes
        self.debug = False    # can be used to activate debugging features
        # Define layers
        self.fc1 = nn.Linear(input_size, 100)   # 100 hidden units
        self.fc1_drop = nn.Dropout(0.2)         # drop-out for faster training, has no effect on inference
        self.fc2 = nn.Linear(100, 100)          # 100 hidden units
        self.fc2_drop = nn.Dropout(0.2)         # drop-out for faster training, has no effect on inference
        self.fc3 = nn.Linear(100, num_classes)  # output layer

    # Expects a batch of 1-D tensor
    # Dimension of x: (batch-size, input_size)
    def forward(self, x):
        x = F.relu(self.fc1(x))   # pass through the first hidden layer
        x = self.fc1_drop(x)      
        x = F.relu(self.fc2(x))   # pass through the second hidden layer
        x = self.fc2_drop(x)      
        x = self.fc2(x)           # pass through the output layer
        return x

## Load Saved Model

In [3]:
!ls -ltr ./saved/
print('')

# Load saved model dictionary
model_path = './saved/trained_mlp100-98.17p.pt'
model_dict = torch.load(model_path)
print(model_dict.keys())

# Parse the values for easier use
Accuracy = model_dict['accuracy']
Correct_count = model_dict['correct_count']
Hparam = model_dict['Hparam']
model_state_dict = model_dict['state_dict']
Model_perf = f'Model Performance:   accuracy: {Accuracy:.2f}%   correct_count: {Correct_count}'  # to be used later
print('Hparam:', Hparam)

# Instantiate the model
model_pt = MLP100(Hparam['input_size'], Hparam['num_classes'])
model_pt.load_state_dict(model_state_dict)
print(model_pt)

total 70220
-rw-rw-r-- 1 makabir makabir 70809849 Jun 19 14:52 test_dataset.pt
-rw-rw-r-- 1 makabir makabir   360783 Jun 19 15:25 trained_mlp100-97.91p.pt
-rw-rw-r-- 1 makabir makabir   360783 Jun 19 15:44 trained_mlp100.pt
-rw-rw-r-- 1 makabir makabir   360783 Jun 19 15:44 trained_mlp100-98.17p.pt

dict_keys(['accuracy', 'correct_count', 'Hparam', 'state_dict'])
Hparam: {'input_size': 784, 'num_classes': 10}
MLP100(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (fc1_drop): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc2_drop): Dropout(p=0.2, inplace=False)
  (fc3): Linear(in_features=100, out_features=10, bias=True)
)


## Load Saved Dataset

In [4]:
# Prints a dataset item
def print_dataitem(item):
    mstr = f"label: {item[0]}, label_index: {item[1]}, predicted_index: {item[2]}, feature_length: {item[3]},"
    mstr2 = f"feature_vector size: {len(item[4])}"
    print(mstr, mstr2)

    
# Load the test dataset
ds_path = './saved/test_dataset.pt'
DS_loaded = torch.load(ds_path)
for key in DS_loaded:
    if key != 'dataset':
        print(f'{key}:', DS_loaded[key])

# Show an item summary
item = DS_loaded['dataset'][0]
print('item-> ', end='')
print_dataitem(item)

label_dict: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
dataset_schema: (label, label_index, predicted_index, feature_length, feature_vector)
item-> label: 7, label_index: 7, predicted_index: 7, feature_length: 784, feature_vector size: 784


## Validate The Loaded Model

In [5]:
# find most likely label index for each element
def get_likely_index(tensor):
    return tensor.argmax(dim=-1)


# Given an item form the test_dataset, returns an example for predict() function
def make_example(data_item):
    feature = torch.tensor(data_item[4])
    return feature


# test prediction from dataset item.
# ptmodel: set it to True for the PyTorch model
def predict(example, model=None, ptmodel=False):  # example: feature_vector
    if ptmodel: model.eval()    # set the pytorch model to evaluation mode
    # Use the model to predict the label of the image
    feature = example
    output = model(feature.unsqueeze(0))
    pred = get_likely_index(output)[0]   # indexing to get the prediction from batch
    return pred.item()


# Test predict()
item = DS_loaded['dataset'][0]
example = make_example(item)
pred = predict(example, model=model_pt, ptmodel=True)
print('pred:',pred)
print_dataitem(item)

pred: 7
label: 7, label_index: 7, predicted_index: 7, feature_length: 784, feature_vector size: 784


In [6]:
# Validate the Given model on the whole dataset
# ptmodel: set it to True for the PyTorch model
def validateMode(model=None, ptmodel=False):
    dataset = DS_loaded['dataset']
    expect_miss = 0      # keeps track of no. of mismatche between prediction in dataset vs model prediction
    total_count = 0
    correct_count = 0
    for item in tqdm(dataset):
        lbl, lbl_index, pred_index, *_ = item
        example = make_example(item)
        pred = predict(example, model=model, ptmodel=ptmodel)
        if pred != pred_index: expect_miss += 1    # prediction does not match prediction in dataset
        if pred == lbl_index: correct_count += 1   # prediction matched the actual label-index
        total_count += 1
    # Compute and print statistics
    accuracy = (100.0 * correct_count) / total_count
    print(f'Validation accuracy: {accuracy:.2f}%   correct_count: {correct_count}   expected-miss: {expect_miss}   total_count: {total_count}')
    return accuracy, correct_count, expect_miss, total_count

            
# Validate the loaded model
validateMode(model_pt, ptmodel=True)
print('Expected', Model_perf)

  0%|          | 0/10000 [00:00<?, ?it/s]

Validation accuracy: 98.17%   correct_count: 9817   expected-miss: 323   total_count: 10000
Expected Model Performance:   accuracy: 98.17%   correct_count: 9817


# Implementation Using torch.tensor Operations

# Implement Using Numpy Matrixing Operations

# Export Numpy Model & Dataset as sqlite3 DB

# Import Saved Model and Validate