# MLP-12 Numpy Model Extraction

**Goals:**

- Load Dataset and Model, then verify.
- Extract the weights.
- Describe the model using tensor operations, then validate.
- Describe the model using numpy matrix operations, then validate.
- Export numpy model as sqlite3 database for implementation in C.
- Export test dataset as sqlite3 database for implementation in C.


**NOTE:** The dataset exported by the training notebook may have incorrect predicted index due to several iterations of model training and not updating the dataset. We'll re-run the predictions here and update the predicted index in the dataset.

# Environment Setup

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import IPython.display as ipd
from tqdm.auto import tqdm


# We don't need GPU for this, not training
#if torch.cuda.is_available():
#    device = torch.device('cuda')
#else:
#    device = torch.device('cpu')

device = 'cpu'
print('Using PyTorch version:', torch.__version__, ' Device:', device)

# Load and Validate torch.nn.Module Implementation

## Define Model

**NOTE:** Always copy the following cell from the training notebook.

In [None]:
# Define an MLP with single hiddend layer with 12 units and ReLU activation.
class MLP12(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLP12, self).__init__()
        # Save parameters
        self.input_size = input_size
        self.num_classes = num_classes
        self.debug = False    # can be used to activate debugging features
        # Define layers
        self.fc1 = nn.Linear(input_size, 12)   # 12 hidden units
        self.fc1_drop = nn.Dropout(0.2)        # drop-out for faster training, has no effect on inference
        self.fc2 = nn.Linear(12, num_classes)  # output layer

    # Expects a batch of 1-D tensor
    # Dimension of x: (batch-size, input_size)
    def forward(self, x):
        x = F.relu(self.fc1(x))   # pass through the hidden layer
        x = self.fc1_drop(x)      
        x = self.fc2(x)           # pass through the output layer
        return x

## Load Saved Model

In [None]:
!ls -ltr ./saved/
print('')

# Loaded saved model dictionary
model_path = './saved/trained_mlp12-94.0p.pt'
model_dict = torch.load(model_path)
print(model_dict.keys())
for k,v in model_dict.items():
    if k!='state_dict': print(k,':',v)
        
        
# Parse the values for easier use
Accuracy = model_dict['accuracy']
Correct_count = model_dict['correct_count']
Hparam = model_dict['Hparam']
Model_state_dict = model_dict['state_dict']
Model_perf = f'Model Performance:   accuracy: {Accuracy:.2f}%   correct_count: {Correct_count}'  # to be used later
print('Hparam:', Hparam)
print('Model_perf:', Model_perf)


# move all weights to cpu
for key in Model_state_dict: 
    Model_state_dict[key] = Model_state_dict[key].to('cpu')
    

# Instantiate the model
model_pt = MLP12(Hparam['input_size'], Hparam['num_classes'])
model_pt.load_state_dict(Model_state_dict)
model_pt.to('cpu')
model_pt.eval()     # we are always evaluating here
print(model_pt)

In [None]:
del model_path, model_dict

## Load Saved Dataset

In [None]:
# Prints a dataset item
def print_dataitem(item):
    mstr = f"label: {item[0]}, label_index: {item[1]}, predicted_index: {item[2]}, feature_length: {item[3]},"
    mstr2 = f"feature_vector size: {len(item[4])}"
    print(mstr, mstr2)

    
# Load the test dataset
ds_path = './saved/test_dataset.pt'
DS_loaded = torch.load(ds_path)
for key in DS_loaded:
    if key != 'dataset':
        print(f'{key}:', DS_loaded[key])

        
# Show an item summary
item = DS_loaded['dataset'][0]
print('item-> ', end='')
print_dataitem(item)

In [None]:
del ds_path, item, key

## Validate The Loaded Model

In [None]:
# find most likely label index for each element
def get_likely_index(tensor):
    # convert to tensor from numpy if needed
    if not torch.is_tensor(tensor):
        tensor = torch.from_numpy(tensor)
    return tensor.argmax(dim=-1)


# Given an item form the test_dataset, returns an example for predict() function
# numpytype: set it to True to return numpy nd-array
def make_example(data_item, numpytype=False):
    feature = torch.tensor(data_item[4])
    if numpytype: feature = feature.detach().numpy()
    return feature


# test prediction from dataset item.
# ptmodel: set it to True for the PyTorch model
def predict(example, model=None, ptmodel=False):  # example: feature_vector
    if ptmodel: model.eval()    # set the pytorch model to evaluation mode
    # Use the model to predict the label of the image
    feature = example
    if ptmodel: feature = feature.unsqueeze(0)    # add the batch dimension for the pytorch model
    output = model(feature)
    pred = get_likely_index(output)
    if ptmodel: pred = pred[0]    # removing batch index
    return pred.item()


# Test predict()
item = DS_loaded['dataset'][0]
example = make_example(item)
pred = predict(example, model=model_pt, ptmodel=True)
print('pred:',pred)
print_dataitem(item)

# Delete names
del item, example, pred

In [None]:
# Validate the Given model on the whole dataset
# ptmodel: set it to True for the PyTorch model
def validateModel(model=None, ptmodel=False, numpytype=False):
    dataset = DS_loaded['dataset']
    expect_miss = 0      # keeps track of no. of mismatche between prediction in dataset vs model prediction
    total_count = 0
    correct_count = 0
    for item in tqdm(dataset):
        lbl, lbl_index, pred_index, *_ = item
        example = make_example(item, numpytype=numpytype)
        pred = predict(example, model=model, ptmodel=ptmodel)
        if pred != pred_index: expect_miss += 1    # prediction does not match prediction in dataset
        if pred == lbl_index: correct_count += 1   # prediction matched the actual label-index
        total_count += 1
    # Compute and print statistics
    accuracy = (100.0 * correct_count) / total_count
    print(f'Validation accuracy: {accuracy:.2f}%   correct_count: {correct_count}   expected-miss: {expect_miss}   total_count: {total_count}')
    return accuracy, correct_count, expect_miss, total_count

            
# Validate the loaded model
validateModel(model_pt, ptmodel=True)
print('Expected', Model_perf)

# Implementation Using torch.tensor Operations

In [None]:
# Extract the weights as torch.tensors
for key in Model_state_dict:
    print(f'{key:10}:', Model_state_dict[key].size())

fc1_weight_pt = Model_state_dict['fc1.weight']
fc1_bias_pt = Model_state_dict['fc1.bias']
fc2_weight_pt = Model_state_dict['fc2.weight']
fc2_bias_pt = Model_state_dict['fc2.bias']

In [None]:
# Define the model using pytorch tensor operations.
# Input interface is the same as the 
def tensorModel(features):
    x1 = fc1_weight_pt @ features + fc1_bias_pt
    fc1_out = F.relu(x1)
    fc2_out = fc2_weight_pt @ fc1_out + fc2_bias_pt
    return fc2_out


# Test this model
item = DS_loaded['dataset'][0]
example = make_example(item)
pred = predict(example, model=tensorModel, ptmodel=False)
print('pred:',pred)
print_dataitem(item)

# Delete names
del item, example, pred

In [None]:
# Validate the tensor operation based model
validateModel(tensorModel, ptmodel=False)
print('Expected', Model_perf)

# Implement Using Numpy Matrix Operations

In [None]:
# Copy weights as numpy ndarray
fc1_weight_np = fc1_weight_pt.detach().numpy()
fc1_bias_np   = fc1_bias_pt.detach().numpy()
fc2_weight_np = fc2_weight_pt.detach().numpy()
fc2_bias_np   = fc2_bias_pt.detach().numpy()

print('fc1_weight_np:', fc1_weight_np.shape)

In [None]:
# Relu on numpy array
def npReLU(np_arr):
    return np.maximum(0, np_arr)


# Define the model using numpy matrix operations
# Input interface is the same as the 
def numpyModel(features):
    x1 = fc1_weight_np @ features + fc1_bias_np
    fc1_out = npReLU(x1)
    fc2_out = fc2_weight_np @ fc1_out + fc2_bias_np
    return fc2_out


# Test this model
item = DS_loaded['dataset'][0]
example = make_example(item, numpytype=True)
pred = predict(example, model=numpyModel, ptmodel=False)
print('pred:',pred)
print_dataitem(item)

# Delete names
del item, example, pred

In [None]:
# Validate the tensor operation based model
validateModel(numpyModel, ptmodel=False, numpytype=True)
print('Expected', Model_perf)

# Update the dataset with the Numpy Model Predicted index

In [None]:
enum_iter = tqdm( enumerate(DS_loaded['dataset']), total=len(DS_loaded['dataset']) )
fix_count = 0
for index, item in enum_iter:
    # Make prediction using Numpy model
    example = make_example(item, numpytype=True)
    pred = predict(example, model=numpyModel, ptmodel=False)
    # Check and fix the predicted_index in the dataset
    if pred!=item[2]:
        DS_loaded['dataset'][index][2] = pred
        fix_count += 1

print(f'INFO: Fixed {fix_count} predicted_index in the dataset')

# Delete names
del enum_iter, fix_count, index, item, example, pred

# Export Numpy Model as sqlite3 DB

In [None]:
# Delete the cache and import sqlite3 utilities
!rm -rf __pycache__/
from utilsqlite3 import *

In [None]:
# Create the database file
DB_path = './saved/trained-mlp12.s3db'
createDB(DB_path, overwrite=True)
!ls -ltrh ./saved

## Write the header table

In [None]:
# Creates the header table
def createHeaderTable(db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    # Create the table
    query_str = '''CREATE TABLE IF NOT EXISTS Header (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        key TEXT,
                        value NUMERIC,
                        description TEXT
                    )'''
    cursor.execute(query_str)
    # Commit the changes and close the connection
    conn.commit()
    conn.close()


# Create the table and check 
createHeaderTable(DB_path)
table_names = getTableNames(DB_path)
print(table_names)
del table_names

In [None]:
# Inserts a record into the Header table (will be called by insertRecordList() utility function)
def insertHeaderRecord(cursor, record):
    key, value, description = record  # this serves as a soft check for the record
    # Insert the record into the table
    cursor.execute('''INSERT INTO Header (key, value, description)
                      VALUES (?, ?, ?)''', (key, value, description))


# Call the function to insert a record
insertRecordList(DB_path, insertHeaderRecord, [('example_key', 'example_value', 'example_description')])
getRecords(DB_path, 'Header')

In [None]:
# Define the records as a list
Fc1w_table = 'FC1_Weight_T'
Fc1b_table = 'FC1_Bias_T'
Fc2w_table = 'FC2_Weight_T'
Fc2b_table = 'FC2_Bias_T'
Hparam_table = 'Hparam_T'

header_records = [
    ('name', 'MLP-12', ''),
    ('architecture', '784-FC:12-10', 'It is an MLP with 1 hidden layer with 12 units with ReLU activation. Trained on MNIST dataset (output layer with 10 units).'),
    ('accuracy', Accuracy, 'Accuracy% of the trained model on the test dataset.'),
    ('correct_count', Correct_count, 'Number of correct predictions by the trained model on the test dataset.'),
    
    ('Hparam.table',   Hparam_table, 'This is the name of the table that contains different parameters of the model.'),
    ('fc1.weight.table', Fc1w_table, 'Name of the table containing the fc1.weight matrix'),
    ('fc1.bias.table',   Fc1b_table, 'Name of the table containing the fc1.bias vector'),
    ('fc2.weight.table', Fc2w_table, 'Name of the table containing the fc2.weight matrix'),
    ('fc2.bias.table',   Fc2b_table, 'Name of the table containing the fc2.bias vector'),
]

# Insert the header records
deleteRows(DB_path, 'Header')  # delete previous records
insertRecordList(DB_path, insertHeaderRecord, header_records)
records = getRecords(DB_path, 'Header')
for r in records: print(r[:-1])   # print all but description field
    
# Delete names
del records

## Write the Hparam Table

In [None]:
# Creates the Hparam table
def createHparamTable(db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    # Create the table
    query_str = f'''CREATE TABLE IF NOT EXISTS {Hparam_table} (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        key TEXT,
                        value NUMERIC,
                        description TEXT
                    )'''
    cursor.execute(query_str)
    # Commit the changes and close the connection
    conn.commit()
    conn.close()


# Create the table and check 
createHparamTable(DB_path)
table_names = getTableNames(DB_path)
print(table_names)

# Delete neames
del table_names

In [None]:
# Inserts a record into the Hparam table
def insertHparamRecord(cursor, record):
    key, value, description = record  # this serves as a soft check for the record
    # Insert the record into the table
    cursor.execute(f'''INSERT INTO {Hparam_table} (key, value, description)
                      VALUES (?, ?, ?)''', (key, value, description))


# Call the function to insert a record
insertRecordList(DB_path, insertHparamRecord, [('example_key', 123, 'example_description')])
getRecords(DB_path, Hparam_table)

In [None]:
# Insert the Hparam records
print('Hparam from pytorch:', Hparam)

hparam_records = [
    ('input_size', Hparam['input_size'], 'Input size of the MLP'),
    ('num_classes', Hparam['num_classes'], 'Output size of the MLP'),
    
    ('fc1.weight.row', len(fc1_weight_np), 'No. of rows in fc1.weight'),
    ('fc1.weight.col', len(fc1_weight_np[0]), 'No. of columns in fc1.weight'),
    ('fc1.bias.len', len(fc1_bias_np), 'Lengths of the fc1.bias vector'),
    
    ('fc2.weight.row', len(fc2_weight_np), 'No. of rows in fc2.weight'),
    ('fc2.weight.col', len(fc2_weight_np[0]), 'No. of columns in fc2.weight'),
    ('fc2.bias.len', len(fc2_bias_np), 'Lengths of the fc2.bias vector'),
]


deleteRows(DB_path, Hparam_table)
insertRecordList(DB_path, insertHparamRecord, hparam_records)
getRecords(DB_path, Hparam_table)

## Write Weights and Biases

In [None]:
# Saves a numpy 2D array as a table in the database.
# Columns: ID, row_no, col_0, col_1, ..., col_n
def createMatrixTable(db_path, table_name, nparray, overwrite=False):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Drop the table if it exists and overwrite requested
    table_exist = existTable(db_path, table_name)
    if overwrite and table_exist:
        print(f'WARN: Overwriting table {table_name}')
        dropTable(db_path, table_name)
        
    # Create the table
    rows, cols = nparray.shape
    column_names = "row_no, " + ", ".join([f"col_{i}" for i in range(cols)])  # Generate the column names string
    cursor.execute(f"CREATE TABLE {table_name} ({column_names})")

    # Insert the array rows into the table
    for i in range(rows):
        vals = f'{i}, ' + ', '.join(map(str, nparray[i]))
        cursor.execute(f"INSERT INTO {table_name} VALUES ({vals})")

    # Commit the changes and close the connection
    conn.commit()
    conn.close()


# test createMatrixTable()
createMatrixTable(DB_path, 'test', fc1_weight_np, overwrite=True)
col_names = getColNames(DB_path, 'test')
records = getRecords(DB_path, 'test')
print('col_names:', col_names[:5], '...', col_names[-5:])
print('records[i]:', records[2][:5], '...')

# Delete names
del col_names, records

In [None]:
# Save the weights
createMatrixTable(DB_path, Fc1w_table, fc1_weight_np, overwrite=True)
createMatrixTable(DB_path, Fc2w_table, fc2_weight_np, overwrite=True)

# Convert vectors into 2D array for the table
fc1b = np.expand_dims(fc1_bias_np, axis=0)
fc2b = np.expand_dims(fc2_bias_np, axis=0)
print('fc1b shape:', fc1b.shape)

createMatrixTable(DB_path, Fc1b_table, fc1b, overwrite=True)
createMatrixTable(DB_path, Fc2b_table, fc2b, overwrite=True)

print('')
print(getTableNames(DB_path))

In [None]:
# Drop extra tables
keep_tables = {'sqlite_sequence', 'Header', 'Hparam_T', 'FC1_Weight_T', 
               'FC2_Weight_T', 'FC1_Bias_T', 'FC2_Bias_T'}

all_tables = getTableNames(DB_path)
cnt = 0
for name in all_tables:
    if name not in keep_tables:
        dropTable(DB_path, name)
        print(f'WARN: Dropped table {name}')
        cnt += 1
print(f'INFO: {cnt} tables dropped')

all_tables = getTableNames(DB_path)
ipd.display(all_tables)

# Delete names
del all_tables, cnt, name

## Import Saved Model and Validate

In [None]:
# Check the meta tables
rec_list = getRecords(DB_path, 'Header')
print('Header:')
for r in rec_list: print(r[1:-1])

print('')
rec_list = getRecords(DB_path, 'Hparam_T')
print('Hparam:')
for r in rec_list: print(r[1:-1])

In [None]:
# Returns a table saved using createMatrixTable as a list of tuples
def readMatrixTable(db_path, table_name):
    # read the records
    rec_list = getRecords(db_path, table_name)
    # build the matrix
    rec_list.sort()         # sort by row_no (first column)
    matrix = []
    for rec in rec_list:
        matrix.append(rec[1:])  # stripe off the row_no columns
    return matrix


# test this functions
mat1 = np.array(readMatrixTable(DB_path, Fc1w_table))
mat1.shape

In [None]:
# Returns the weights and biases as a dictionary
def readModelParam(db_path, table_names):
    model_params = {}
    for name in table_names:
        # read the matrix as a list of tuples
        mat = readMatrixTable(db_path, name)
        # Check if it is a matrix or a vector
        if len(mat)==1: is_vector = True
        else: is_vector = False
        # convert to numpy array
        if is_vector: mat = np.array(mat[0])    # make a 1D array for vectors
        else: mat = np.array(mat)
        # save it for returning
        model_params[name] = mat
    return model_params
        

# test this function
param_tables = [
    'FC1_Weight_T',
    'FC2_Weight_T',
    'FC1_Bias_T',
    'FC2_Bias_T',
]

model_params = readModelParam(DB_path, param_tables)
for k, v in model_params.items():
    print(f'{k}:', v.shape, v.dtype)
    

# Delete names
del k, v, mat1

In [None]:
# Compare with original weights
org_params = {
    'FC1_Weight_T': fc1_weight_np,
    'FC2_Weight_T': fc2_weight_np,
    'FC1_Bias_T': fc1_bias_np,
    'FC2_Bias_T': fc2_bias_np,
}


def compare_model_params(model_params, org_params, tolerance):
    for k in model_params:
        print('\nComparing:', k)
        db_val = model_params[k]
        org_val = org_params[k]
        dmin = np.min(org_val)
        dmax = np.max(org_val)
        print('min:', dmin, '  max:', dmax)
        diff_val = np.max(np.abs(db_val - org_val))   # get the maximum difference
        print('diff:', diff_val)
        assert diff_val <= tolerance   # use manual check
        assert np.allclose(db_val, org_val, rtol=tolerance)  # use numpy built-in check


# compare with tolerance
tolerance = 1e-6
compare_model_params(model_params, org_params, tolerance)

# Export the Dataset as sqlite3 DB

In [None]:
# Create the database file
DB_ds_path = './saved/mnist_test_data.s3db'
createDB(DB_ds_path, overwrite=True)
!ls -ltr saved/

## Save the Header Table

In [None]:
# Create the table and check 
createHeaderTable(DB_ds_path)
table_names = getTableNames(DB_ds_path)
print(table_names)

In [None]:
# Define the records as a list
Label_table = 'Labels_T'
Dataitem_table = 'DataItems_T'
Feature_table = 'Features_T'

item = DS_loaded['dataset'][0]
Feature_len = len(item[-1])    # last field in the item is the feature_vector

header_records = [
    ('name', 'MNIST-Test', 'Features (flattened images) extracted from the test dataset of MNIST.'),
    ('feature_length', Feature_len, 'The length of the feature. These features can be directly fed to the MLP12 model'),
    ('accuracy', Accuracy, 'Accuracy of the MLP12 model used to generate the "predicted_index" values.'),
    
    ('labels.table',  Label_table, 'Index to label mapping. The model predicts an index, which can be converted to the label using this table'),
    ('dataset.table', Dataitem_table, 'This table serves as the (label, feature) list. The actual features are stored in a separate table.'), 
    ('dataitem.schema', '', 'There are 3 label-related fields in the dataset.table: "label" is the ground-truth, "label_index" is the index into the index-to-label mapping, "predicted_index" is the index predicted by the trained MLP12 model'),
    ('features.table', Feature_table, 'Contains the actual features for the model.'),
]

# Insert the header records
deleteRows(DB_ds_path, 'Header')  # delete previous records
insertRecordList(DB_ds_path, insertHeaderRecord, header_records)
records = getRecords(DB_ds_path, 'Header')
for r in records: print(r[:-1])   # print all but description field

## Save the Index-to-label mapping table

In [None]:
# Creates the labels table
def createLableTable(db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    # Create the table
    query_str = f'''CREATE TABLE IF NOT EXISTS {Label_table} (
                    label_index INTEGER PRIMARY KEY,
                    label TEXT
                )'''
    cursor.execute(query_str)
    # Commit the changes and close the connection
    conn.commit()
    conn.close()


# Create the table and check 
createLableTable(DB_ds_path)
table_names = getTableNames(DB_ds_path)
print(table_names)

In [None]:
# Inserts a record into the labels table
def insertLabelRecord(cursor, record):
    label_index, label = record  # this serves as a soft check for the record format
    # Insert the record into the table
    query = f"INSERT INTO {Label_table} (label_index, label) VALUES (?, ?)"
    cursor.execute(query, (label_index, label))


# Call the function to insert a record
deleteRows(DB_ds_path, Label_table)
insertRecordList(DB_ds_path, insertLabelRecord, [(-1, 'test')])
getRecords(DB_ds_path, Label_table)

In [None]:
# Build the label records
labels_dict = DS_loaded['label_dict']
label_records = [(label_index, str(label)) for label, label_index in labels_dict.items()]
print('label_records:', label_records)

# Store them in the table
deleteRows(DB_ds_path, Label_table)
insertRecordList(DB_ds_path, insertLabelRecord, label_records)
rec_list = getRecords(DB_ds_path, Label_table)
print('rec_list:', rec_list)

## Save the data-items and features

In [None]:
# Split the dataitems for DataItem table and Features table
dataitem_records = []
feature_records = []

item = DS_loaded['dataset'][0]
print('item[-1].type:', type(item[-1]))

for item_index, item in enumerate(DS_loaded['dataset']):
    label, label_index, pred_index, feat_len, feat_vec = item   # parse the item
    feat_id = item_index      # use the index in the dataset as the feature ID
    item_rec = [label, label_index, pred_index, feat_id]
    feat_rec = [feat_id] + feat_vec    # feature-record: (feature-id, col_0, col_1, ...)
    dataitem_records.append(item_rec)
    feature_records.append(feat_rec)

# check the records
print('')
print('dataitem_records:', len(dataitem_records), len(dataitem_records[0]))
print('feature_records:', len(feature_records), len(feature_records[0]))

check_index = 100
item = DS_loaded['dataset'][check_index]
assert feature_records[check_index][1:] == item[-1]

In [None]:
del item_index, item, feat_id, item_rec, feat_rec, feat_len, feat_vec, label
del key, r, rec_list, table_names, records, org_params, check_index, keep_tables

## Save Data Items

In [None]:
# Creates the dataset table to save the data-items
def createDataTable(db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Define the table name and column names
    table_name = Dataitem_table
    columns = ["id INTEGER PRIMARY KEY AUTOINCREMENT",
               "label TEXT",
               "label_index INTEGER",
               "predicted_index INTEGER",
               "feature_id INTEGER"]

    # Create the table
    query = f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join(columns)})"
    cursor.execute(query)
    conn.commit()
    conn.close()


# Create the table
createDataTable(DB_ds_path)
table_names = getTableNames(DB_ds_path)
print(table_names)

In [None]:
# Inserts a record into the Dataset table
def insertDataRecord(cursor, record):
    label, label_index, predicted_index, feature_id = record  # this serves as a soft check for the record
    # Insert the record into the table
    query = f"INSERT INTO {Dataitem_table} (label, label_index, predicted_index, feature_id) VALUES (?, ?, ?, ?)"
    cursor.execute(query, (label, label_index, predicted_index, feature_id))


# Call the function to insert a record
insertRecordList(DB_ds_path, insertDataRecord, [("Item 1", 1, 2, 3)])
getRecords(DB_ds_path, Dataitem_table)

In [None]:
# insert all dataset records
deleteRows(DB_ds_path, Dataitem_table)   # delete old records
insertRecordList(DB_ds_path, insertDataRecord, dataitem_records)
rec_list = getRecords(DB_ds_path, Dataitem_table)
print('rec_list:', len(rec_list), len(rec_list[0]))
print('rec_list[0]', rec_list[0])

## Save Features

In [None]:
# Creates the features table to save the feature_vectors
# Columns: feature_id, col_0, col_1, ..., col_n
def createFeatureTable(db_path, table_name, feature_list, overwrite=False):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Drop the table if it exists and overwrite requested
    table_exist = existTable(db_path, table_name)
    if overwrite and table_exist:
        print(f'WARN: Overwriting table {table_name}')
        dropTable(db_path, table_name)
        
    # Create the table
    cols = len(feature_list[0]) - 1    # ommitting the feature-id column from count
    column_names = "feature_id, " + ", ".join([f"col_{i}" for i in range(cols)])  # Generate the column names string
    cursor.execute(f"CREATE TABLE {table_name} ({column_names})")

    # Insert the featurs into the table
    for feat_item in tqdm(feature_list):
        feat_id = feat_item[0]
        feat_vec = feat_item[1:]
        vals = f'{feat_id}, ' + ', '.join(map(str, feat_vec))
        cursor.execute(f"INSERT INTO {table_name} VALUES ({vals})")

    # Commit the changes and close the connection
    conn.commit()
    conn.close()


# Create the table
createFeatureTable(DB_ds_path, Feature_table, feature_records, overwrite=True)
table_names = getTableNames(DB_ds_path)
print(table_names)

rec_list = getRecords(DB_ds_path, Feature_table)
print('rec_list:', len(rec_list))

In [None]:
feat_item = rec_list[0]
print('feat_item:', len(feat_item))
type_count = {}
for d in feat_item: 
    t = type(d)
    if t not in type_count: type_count[t] = 0
    type_count[t] += 1
print(type_count)


# Delete names
del feat_item, type_count, d

In [None]:
# Drop extra tables ---------------
keep_tables = {'Header', 'sqlite_sequence', 'Labels_T', 'DataItems_T', 'Features_T'}

all_tables = getTableNames(DB_ds_path)
cnt = 0
for name in all_tables:
    if name not in keep_tables:
        dropTable(DB_ds_path, name)
        print(f'WARN: Dropped table {name}')
        cnt += 1
print(f'INFO: {cnt} tables dropped')

all_tables = getTableNames(DB_ds_path)
ipd.display(all_tables)

# Delete names
del all_tables, cnt, name

## Import Saved Dataset and Verify

In [None]:
# Check the meta tables
rec_list = getRecords(DB_ds_path, 'Header')
print('Header:')
for r in rec_list: print(r[1:-1])

In [None]:
# Compare with original features and dataitems
def compare_dataset(db_items, org_items, tolerance):
    range_iter = tqdm(range(len(db_items)))
    for i in range_iter:
        #print(i)
        db_rec = db_items[i]
        org_rec = org_items[i]
        # Compare the labels
        db_labels = db_rec[:3]
        org_labels = org_rec[:3]
        #print('db_rec:', db_rec)
        #print('org_rec:', org_rec)
        assert db_labels==org_labels, "Labels mismatch"
        #if i==5: break
        # Check features
        org_feat = np.array(org_rec[-1])
        db_feat = np.array(db_rec[-1])
        assert np.allclose(org_feat, db_feat, tolerance), "Feature vector mismatch"
    print(f'INFO: Compared {(i+1)} records')


# merge the tables to make similar records as in DS_loaded['dataset']
# build a feat_id: feat_vec map for merging.
feat_records = getRecords(DB_ds_path, Feature_table)
feat_rec_map = {}
for fitem in feat_records:
    feat_id = fitem[0]
    feat_vec = fitem[1:]
    feat_rec_map[feat_id] = feat_vec

# merge feature vectors with dataset items for comparison
data_records = getRecords(DB_ds_path, Dataitem_table)
db_items = []
for drec in data_records:
    feat_id = drec[-1]
    feat_vec = feat_rec_map[feat_id]
    merged_item = list(drec[1:4]) + [feat_vec]   # remove ID column and concat feature vector
    merged_item[0] = int(merged_item[0])
    db_items.append(merged_item)
        
# compare with tolerance
tolerance = 1e-6
compare_dataset(db_items, DS_loaded['dataset'], tolerance)

# Concluding Remarks

Now you can use these databases to translate the Numpy model into C implementations. You can also run experiments on fixed-point precisions.