# Export MLP-12 Model and Dataset as C source files

**Goals:**
- Load the Model and Dataset from the SQLite3 databases.
- Export Model parameters as C source file.
    - A header file with struct and extern definitions
    - A C file with actual values as array of structs
- Export a subset of Test Dataset as C source file.
    - A header file with record-struct and extern definitions
    - A C file with actual values as array of structs

# Load Floating-Point Model Parameters and Datset

Here, the model and the dataset exported by the Numpy model extraction notebook is loaded.

In [None]:
# Delete the cache and import sqlite3 utilities
!rm -rf __pycache__/
from utilsqlite3 import *

## Load the Dataset

In [None]:
# Load and check the dataset table
Dataset_path = './saved/mnist_test_data-94.00p.s3db'

table_names = getTableNames(Dataset_path)
print('table_names:', table_names)

# Read the header table
header_records =  getRecords(Dataset_path, 'Header')
header_dict = {}
print('')
for r in header_records: 
    print(r[1:3])
    header_dict[r[1]] = r[2]
    
# Get the table names
Data_table = header_dict['dataset.table']
Feature_table = header_dict['features.table']
Label_table = header_dict['labels.table']
print('')
print('Data_table:', Data_table)
print('Feature_table:', Feature_table)
print('Label_table:', Label_table)

In [None]:
# Read the label_to_index dictionary
labels_records = getRecords(Dataset_path, Label_table)
print('labels_records:', labels_records)

Label_to_index = {label:index for (index, label) in labels_records}
Index_to_label = {index:label for (index, label) in labels_records}
print('Label_to_index:', Label_to_index)
print('Index_to_label:', Index_to_label)

### Build the Dataset Array

In [None]:
import numpy as np
import dataclasses
from dataclasses import dataclass, asdict
from typing import List


# Dataset item class
@dataclass
class DataItem:
    label: str
    label_index: int
    predicted_index: int
    feature_vec: List[np.float32]
        
    def getItemSummary(self):
        return str((self.label, self.label_index, self.predicted_index, self.feature_vec.shape))

In [None]:
# Read the features and data-item records then merge them
# Make the feature_id:feature_vec map
feat_records = getRecords(Dataset_path, Feature_table)
featid_map = {}
for r in feat_records:
    feat_id = r[0]       # first column is the feature ID
    feat_vec = r[1:]
    featid_map[feat_id]  = feat_vec

    
# Read the data-items and put them in DataItem array
Dataset = []
data_records = getRecords(Dataset_path, Data_table)
data_schema = getColNames(Dataset_path, Data_table)
print('data_schema:', data_schema)

for r in data_records:
    label = r[1]
    label_index = r[2]
    pred_index = r[3]
    feat_id = r[4]
    feat_vec = np.array(featid_map[feat_id], dtype=np.float32)
    item = DataItem(label, label_index, pred_index, feat_vec)
    Dataset.append(item)

item = Dataset[0]
print('Dataset:', len(Dataset))
print('item:', item.getItemSummary())

In [None]:
# Delete names to avoid confusion later
del table_names, r, labels_records, header_records, header_dict

## Load the Trained Model

In [None]:
# Load and check the model parameters table
Model_path = './saved/trained-mlp12-94.00p.s3db'

table_names = getTableNames(Model_path)
print('table_names:', table_names)

# Read the header table
header_records =  getRecords(Model_path, 'Header')
Header_dict = {}
print('')
for r in header_records: 
    print(r[1:3])
    Header_dict[r[1]] = r[2]


# Get the table names for later use
Fc1w_table = Header_dict['fc1.weight.table']
Fc1b_table = Header_dict['fc1.bias.table']
Fc2w_table = Header_dict['fc2.weight.table']
Fc2b_table = Header_dict['fc2.bias.table']

In [None]:
# Returns a table saved using createMatrixTable as a list of tuples
def readMatrixTable(db_path, table_name):
    # read the records
    rec_list = getRecords(db_path, table_name)
    # build the matrix
    rec_list.sort()         # sort by row_no (first column)
    matrix = []
    for rec in rec_list:
        matrix.append(rec[1:])  # stripe off the row_no columns
    return matrix


# test this functions
mat1 = np.array(readMatrixTable(Model_path, Fc1w_table))
mat1.shape

In [None]:
# Returns the weights and biases as a dictionary
def readModelParams(db_path, table_names):
    model_params = {}
    for name in table_names:
        # read the matrix as a list of tuples
        mat = readMatrixTable(db_path, name)
        # Check if it is a matrix or a vector
        if len(mat)==1: is_vector = True
        else: is_vector = False
        # convert to numpy array
        if is_vector: mat = np.array(mat[0])    # make a 1D array for vectors
        else: mat = np.array(mat)
        # save it for returning
        model_params[name] = mat
    return model_params


# List of table names for running loop
ParamTable_names = [
    Fc1w_table,
    Fc1b_table,
    Fc2w_table,
    Fc2b_table,
]
print('ParamTable_names:', ParamTable_names)

# Read the model parameters as numpy matrix/vectors
model_params = readModelParams(Model_path, ParamTable_names)
for k, v in model_params.items():
    print(f'{k}:', v.shape, v.dtype)

In [None]:
# Model Parameters class
@dataclass
class mlp12_Params:
    fc1_weight: np.ndarray
    fc2_weight: np.ndarray
    fc1_bias: np.ndarray
    fc2_bias: np.ndarray
        

# Instantiate the model parameter class with float32 datatype
Model_params = mlp12_Params(
    model_params[Fc1w_table].astype(np.float32),
    model_params[Fc2w_table].astype(np.float32),
    model_params[Fc1b_table].astype(np.float32),
    model_params[Fc2b_table].astype(np.float32),
)

# Show the parameter info
for field in dataclasses.fields(Model_params):
    field_value = getattr(Model_params, field.name)
    print(field.name+':', field_value.shape, field_value.dtype)

In [None]:
del feat_id, feat_records, feat_vec, featid_map, field, field_value
del item, k, label, label_index, mat1, pred_index, r, v, table_names

# Export The Model

**Structs:**

- A Matrix type struct
- A Vector type struct
- A model parameter type struct
    - Will be composed on matrix and vector structs
    - Can have additional fields with metadata

## Export Model Header

In [None]:
# Define Header file components

header_guard = "MODEL_MLP12_H"
includes = '''
#include <stdint.h>
'''

matrix_struct = '''
// Floating point matrix
typedef struct {
  uint32_t row_cnt;
  uint32_t col_cnt;
  float *elements;       // 2D array of floats
} mlp12_Matrix_fl;
'''

vector_struct = '''
// Floating point vector
typedef struct {
  uint32_t vec_len;
  float *elements;       // 1D array of floats
} mlp12_Vector_fl;
'''

Param_struct_name = "mlp12_Params_fl"
param_struct = f'''
// Trained wieghts as floating point numbers
typedef struct {{
  // metadata
  float accuracy;
  char *summary;     // A human-readable string

  // model trained parameters
  mlp12_Matrix_fl   fc1_weight;
  mlp12_Matrix_fl   fc2_weight;
  mlp12_Vector_fl   fc1_bias;
  mlp12_Vector_fl   fc2_bias;

}} {Param_struct_name};
'''

In [None]:
# Build the Header template
from string import Template

Param_inst_name = 'mlp12_trained_params'
header_template = Template(f'''
#ifndef $header_guard
#define $header_guard

$includes

$matrix_struct
$vector_struct
$param_struct

extern const {Param_struct_name}  {Param_inst_name};


#endif  // $header_guard
''')

In [None]:
# Generate header content and save it in a file
Header_filename = 'mlp12_params.h'
save_path = './c-impl/' + Header_filename
header_content = header_template.substitute(
    header_guard=header_guard, includes=includes, 
    matrix_struct=matrix_struct, vector_struct=vector_struct,
    param_struct=param_struct
)

#print(header_content)
with open(save_path, 'w') as outfile:
    outfile.write(header_content)
print(f"INFO: Saved header file {save_path}")

In [None]:
del header_guard, includes, matrix_struct, vector_struct, param_struct
del save_path, outfile, header_template, header_content, header_records
del data_records, data_schema

## Export Model Parameters

In [None]:
## Formatting Functions
# Given a numpy vector, returns a C-source string
def csourceVector(np_vec):
    elems = []
    for e in np_vec: elems.append(f'{e:18.10e}')   # keep 10 digits of precision
    return ', '.join(elems)
    
    
# Given a numpy matrix, returns a C-source string,
# one row per line.
def csourceMatrix(np_matrix):
    lines = []
    for row in np_matrix:
        lines.append(csourceVector(row))
    return ', \n'.join(lines)


# Given a parameter, returns as C-source string of the parameter values
def csourceParam(param):
    if len(param.shape)==1:
        return csourceVector(param)
    else:
        return csourceMatrix(param)
    
    
# Test
print(csourceParam(Model_params.fc1_bias[:4]))
print('')
print(csourceParam(Model_params.fc1_weight[:6, :4]))

In [None]:
## Generate C-source
# define templates
Cstatic_2D_template = Template('''
static float   $name[$row_cnt][$col_cnt] = {
$values
};
''')

Cstatic_1D_template = Template('''
static float   $name[$elem_cnt] = {
$values
};
''')


# Given a parameter, returns C-source for static float array
def makeCstaticArray(name, values):
    str_vals = csourceParam(values)
    if len(values.shape) == 1:
        elem_cnt = len(values)
        str_out = Cstatic_1D_template.substitute(name=name, elem_cnt=elem_cnt, values=str_vals)
    else:
        row, col = values.shape
        str_out = Cstatic_2D_template.substitute(name=name, row_cnt=row, col_cnt=col, values=str_vals)
    return str_out
    


# Build a dictionary of C-source parameter static arrays
params_dict = asdict(Model_params)
params_src = {}
for name, val in params_dict.items():
    params_src[name] = makeCstaticArray(name, val)

In [None]:
# Build the C-source elements
param_template = Template('''
#include "$header_filename"

$static_arrays

const $param_struct_name   $param_inst_name = {
$field_assignment
};
''')


# Build template substitutions
static_arrays = [val for val in params_src.values()]
static_arrays = '\n'.join(static_arrays)

In [None]:
# field assignment for parameter object initialization
fc1_row_cnt, fc1_col_cnt = params_dict['fc1_weight'].shape
fc2_row_cnt, fc2_col_cnt = params_dict['fc2_weight'].shape
field_assignment = [
    f'.accuracy = {Header_dict["accuracy"]}',
    f'.summary = "MLP-12 for MNIST"',
#    f'.fc1_row_cnt = {fc1_row_cnt}',
#    f'.fc1_col_cnt = {fc1_col_cnt}',
#    f'.fc2_row_cnt = {fc2_row_cnt}',
#    f'.fc2_col_cnt = {fc2_col_cnt}',
]
for k, v in params_dict.items():
    isvector = (len(v.shape) == 1)
    if isvector:
        vec_len = len(params_dict[k])
        field = f'.{k} = {{ {vec_len}, (float*){k} }}'
    else:
        row, col = v.shape
        field = f'.{k} = {{ {row}, {col}, (float*){k} }}'
    field_assignment.append(field)

field_assignment = ',\n'.join(field_assignment)
print(field_assignment)

In [None]:
# Build the C-source then save
save_path = './c-impl/mlp12_params.c'
c_source = param_template.substitute(header_filename=Header_filename, static_arrays=static_arrays,
                                     param_struct_name=Param_struct_name, param_inst_name=Param_inst_name,
                                     field_assignment=field_assignment)

with open(save_path, 'w') as outfile:
    outfile.write(c_source)
print(f"INFO: Saved file {save_path}")

In [None]:
del val, vec_len, v, static_arrays, save_path, row, params_src
del params_dict, param_template, outfile, name, model_params, k
del isvector, field, field_assignment, col, c_source

# Export Test Dataset

## Export Dataset Header

In [None]:
# Define Header file components

header_guard = "DATASET_MNIST_H"
includes = '''
#include <stdint.h>
'''

Record_struct_name = "mnist_record"
record_struct = f'''
// An element of the dataset array
typedef struct {{
  uint8_t label_index;
  uint8_t predicted_index;
  float   feature_vec[784];
}} {Record_struct_name} ;
'''

print(record_struct)

In [None]:
# Build the Header template
Export_len = 1000
Dataset_inst_name = 'mnist_dataset'

header_template = Template(f'''
#ifndef $header_guard
#define $header_guard

$includes
#define mnist_DATASET_LEN $dataset_len

$record_struct

extern const {Record_struct_name}  {Dataset_inst_name}[$dataset_len];


#endif  // $header_guard
''')

In [None]:
# Generate header content and save it in a file
Header_filename = 'mnist_dataset.h'
save_path = './c-impl/' + Header_filename
header_content = header_template.substitute(
    header_guard=header_guard, includes=includes, 
    record_struct=record_struct, dataset_len=Export_len,
)

#print(header_content)
with open(save_path, 'w') as outfile:
    outfile.write(header_content)
print(f"INFO: Saved header file {save_path}")

## Export Dataset C-source

In [None]:
from random import sample


# Select a subset of data items
data_subset = sample(Dataset, Export_len)
print('data_subset len:', len(data_subset))

In [None]:
C_item_template = Template(
'''.label_index = $label_index,
.predicted_index = $predicted_index,
.feature_vec = { $feature_vec_str },''')


# Given a DataItem, returns an instantiation C-source code
def makeRecordSource(item):
    mstr = ['{']
    feature_vec_str = csourceVector(item.feature_vec)
    mstr.append( C_item_template.substitute( label_index=item.label_index, 
                                             predicted_index=item.predicted_index,
                                             feature_vec_str=feature_vec_str)       )
    mstr.append('}')
    return '\n'.join(mstr)


# Test makeRecordSource()
#item = data_subset[0]
#print(makeRecordSource(item))
    

In [None]:
## Build C-source content
# C-source template
csource_template = Template(f'''
#include "$header_filename"


const {Record_struct_name}  {Dataset_inst_name}[$dataset_len] = {{
$record_list_str
}};
''')


# Build record_list_str
record_list_str = []
for item in data_subset:
    record_str = makeRecordSource(item)
    record_list_str.append(record_str)
record_list_str = ',\n\n'.join(record_list_str)


# instantiate the template
csource_data = csource_template.substitute(header_filename=Header_filename,
                                           dataset_len=Export_len, 
                                           record_list_str=record_list_str)

In [None]:
# Save the dataset content as C-source
save_path = './c-impl/mnist_dataset.c'
with open(save_path, 'w') as outfile:
    outfile.write(csource_data)
print(f"INFO: Saved header file {save_path}")