## Prepare

In [1]:
import torch, struct, os, psutil, subprocess, time, threading
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import tensorflow as tf

from torch.utils.data import DataLoader, TensorDataset
import concurrent.futures
import pandas as pd

2024-02-03 22:38:24.875431: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-03 22:38:24.908001: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-03 22:38:24.908026: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-03 22:38:24.908709: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-03 22:38:24.913451: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# Load TensorFlow MNIST data
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Convert to PyTorch tensors
test_images_pt = torch.tensor(test_images).float()
test_labels_pt = torch.tensor(test_labels)
# Flatten and normalize the images
test_images_pt = test_images_pt.view(-1, 28*28) / 255.0  # Flatten and normalize

# Assuming test_images_pt is your PyTorch tensor with shape [num_samples, 784]
test_images_pt_reshaped = test_images_pt.view(-1, 1, 28, 28)  # Reshape to [num_samples, channels, height, width]

# Downsample images
test_images_pt_downsampled = F.interpolate(test_images_pt_reshaped, size=(14, 14), mode='bilinear', align_corners=False)

# Flatten the images back to [num_samples, 14*14]
test_images_pt = test_images_pt_downsampled.view(-1, 14*14)

In [36]:
# Create TensorDataset for test data
test_dataset = TensorDataset(test_images_pt, test_labels_pt)

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def evaluate_pytorch_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

In [4]:
def load_img_from_file(data_file="input", show=False):
    try:
        with open(data_file, 'rb') as file:
            buf = file.read()
    except Exception as e:
        print(e)
        return None, e

    digits = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
    l = int(np.sqrt(len(digits)))
    if show:
        c = ""
        for row in range(l):
            for col in range(l):
                if buf[row * l + col] > 230:
                    c += "&"
                else:
                    c += "-"
            c += "\n"
        print(c)

    return digits, None

def save_img_to_file(image, data_file = "input"):
    try:
        # Convert to bytes
        image_bytes = np.array(image*255).astype('uint8').tobytes()
        
        # Write to file
        with open(data_file, 'wb') as file:
            file.write(image_bytes)
        
        #print(f"Image saved to {data_file}")
    except Exception as e:
        print(f"Error saving image: {e}")

In [5]:
def monitor_memory(pid, freq = 0.01):
    p = psutil.Process(pid)
    max_memory = 0
    while True:
        try:
            mem = p.memory_info().rss / (1024 * 1024)
            max_memory = max(max_memory, mem)
        except psutil.NoSuchProcess:
            break  # Process has finished
        time.sleep(freq)  # Poll every second
        
    #print(f"Maximum memory used: {max_memory} MB")
    return max_memory

In [6]:
def benchmark(test_images, tmp_folder, model_in_path, vm_file = "./bin/vm", program = "./bin/mlgo_784.bin", threaded = True):
    print (vm_file, program)
    
    benchmark_start_time = time.time()
    veri_infer = []
    mem_usage = []
    time_cost = []
    for ind, img in enumerate(test_images):
        img_out_path = tmp_folder + str(ind)
        save_img_to_file(img, img_out_path)

        # Exclusion of Pre-processing
        start_time = time.time()
        command = [f"{vm_file}", f"--basedir={folder}",
                f"--program={program}", f"--model={model_in_path}", 
                f"--data={img_out_path}", "--mipsVMCompatible"]
        
        print ("Process for image", ind)
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Get the process ID
        pid = process.pid

        if threaded:
            with concurrent.futures.ThreadPoolExecutor() as executor:
                future = executor.submit(monitor_memory, pid)
                _, stderr = process.communicate()
                max_memory = future.result()
                #print(f"Maximum memory used in multi-threaded mode: {max_memory} bytes")
        else:
            max_memory = monitor_memory(pid)  # Run in the same thread
            _, stderr = process.communicate()
            #print(f"Maximum memory used in single-threaded mode: {max_memory} bytes")
        veri_infer.append(int(stderr[-2]))
        mem_usage.append(max_memory)
        time_cost.append(time.time() - start_time)
    
    print ("Total time:", time.time() - benchmark_start_time)
    #print ("total mem:", sum(mem))
    return veri_infer, mem_usage, time_cost

def calculate_loss(veri_infer, predicted_labels):
    count = 0
    for i in range(len(veri_infer)):
        if veri_infer[i] != predicted_labels[i]:
            count +=1
            print (f"Index {i} Not match!")

    return count/len(veri_infer)*100

In [7]:
csv_path = '../../benchmarks/benchmark_results.csv'

columns = ['Framework', 'Architecture', '# Layers', '# Parameters', 'Testing Size', 'Accuracy Loss (%)', 
           'Avg Memory Usage (MB)', 'Std Memory Usage', 'Avg Proving Time (s)', 'Std Proving Time']

# Check if the CSV file exists
if not os.path.isfile(csv_path):
    # Create a DataFrame with the specified columns
    df = pd.DataFrame(columns=columns)
    # Save the DataFrame as a CSV file
    df.to_csv(csv_path, index=False)
else:
    print(f"File '{csv_path}' already exists.")

df = pd.read_csv(csv_path)

File '../../benchmarks/benchmark_results.csv' already exists.


In [None]:
config = {} # Make a shared file for saving all the configs

## Benchmark for 196_25_10 DNN Model

In [17]:
# TODO: use json file for model (results, config etc) management
arch_folder = "input-dense-dense/"
layers = [196,25,10]
model_name = "_".join([str(x) for x in layers])
model_path = "../../models/"

state_dict = torch.load(model_path + arch_folder+ model_name + ".pth")
list_vars = state_dict


In [18]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(layers[0], layers[1])  # Flatten 
        self.fc2 = nn.Linear(layers[1], layers[2])  

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim = 1)
    
model_pt = Net()
model_pt.load_state_dict(state_dict)
model_pt.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Ensure gradients are not computed
    predictions = model_pt(test_images_pt)
    predicted_labels = predictions.argmax(dim=1)

predicted_labels = predicted_labels.tolist()

In [19]:
# Evaluate the PyTorch model
accuracy = evaluate_pytorch_model(model_pt, test_loader)
print(f'Accuracy of the PyTorch model on the test images: {accuracy:.8f}%')

Accuracy of the PyTorch model on the test images: 95.41000000%


### Convert Model

In [20]:
folder = "./tmp/"

# Create the directory 'tmp' in the current working directory
os.makedirs(folder, exist_ok=True)

In [21]:
fname_out = "./bin/" + arch_folder + "ggml-model-" + model_name + ".bin"
pack_fmt = "!i"

os.makedirs("./bin/" + arch_folder, exist_ok=True)

fout = open(fname_out, "w+b")
fout.write(struct.pack(pack_fmt, 0x67676d6c)) # magic: ggml in hex

for name in list_vars.keys():
    data = list_vars[name].squeeze().numpy()
    print("Processing variable: " + name + " with shape: ", data.shape) 
    n_dims = len(data.shape)
   
    fout.write(struct.pack(pack_fmt, n_dims))
    
    data = data.astype(np.float32)
    for i in range(n_dims):
        fout.write(struct.pack(pack_fmt, data.shape[n_dims - 1 - i]))

    # data
    data = data.astype(">f4")
    data.tofile(fout)

fout.close()

Processing variable: fc1.weight with shape:  (25, 196)
Processing variable: fc1.bias with shape:  (25,)
Processing variable: fc2.weight with shape:  (10, 25)
Processing variable: fc2.bias with shape:  (10,)


### Test

In [29]:
ind = 89
img_out_path = folder + str(ind)
save_img_to_file(test_images_pt[ind], img_out_path)

In [30]:
vm_file = "./bin/vm"
program = "./bin/mlgo_196.bin"
model_in_path = fname_out

command = [f"{vm_file}", f"--basedir={folder}",
                f"--program={program}", f"--model={model_in_path}", 
                f"--data={img_out_path}", "--mipsVMCompatible"]

In [31]:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Get the process ID
pid = process.pid
print(f"Process ID: {pid}")

# Start memory monitoring in a separate thread
monitor_thread = threading.Thread(target=monitor_memory, args=(pid,))
monitor_thread.start()

# Wait for the process to complete and capture output
stdout, stderr = process.communicate()

# Wait for the monitoring thread to finish
monitor_thread.join()

print (f"opml outputs predicted class:{stderr[-2]} where original model pred:{predicted_labels[ind]}")

Process ID: 1727489
opml outputs predicted class:1 where original model pred:1


In [32]:
# Example usage
digits, error = load_img_from_file(img_out_path,show=True)

--------------
--------------
--------------
--------------
--------&-----
--------------
-------&------
------&&------
-------&------
------&-------
------&-------
--------------
--------------
--------------



### Benchmark

In [34]:
test_size = 10
veri_infer, mem_usage, time_cost = benchmark(test_images_pt[:test_size], './tmp/',model_in_path, vm_file=vm_file, program=program)

./bin/vm ./bin/mlgo_196.bin
Process for image 0
Process for image 1
Process for image 2
Process for image 3
Process for image 4
Process for image 5
Process for image 6
Process for image 7
Process for image 8
Process for image 9
Total time: 8.301805019378662


### Save Results

In [35]:
loss_results = calculate_loss(veri_infer, predicted_labels)

In [None]:
new_row = {
    'Framework': ['opml (pytorch)'],
    'Architecture': ['Input-Dense-Dense (196x25x10'],
    '# Layers': [3],
    '# Parameters': [5185],
    'Testing Size': [len(veri_infer)],
    'Accuracy Loss (%)': [loss_results],
    'Avg Memory Usage (MB)': [sum(mem_usage) / len(mem_usage)],
    'Std Memory Usage': [pd.Series(mem_usage).std()],
    'Avg Proving Time (s)': [sum(time_cost) / len(time_cost)],
    'Std Proving Time': [pd.Series(time_cost).std()]
}

new_row_df = pd.DataFrame(new_row)


In [30]:
df = pd.concat([df, new_row_df], ignore_index=True)
df.to_csv(csv_path, index=False)
df

Unnamed: 0,Framework,Architecture,# Layers,# Parameters,Testing Size,Accuracy Loss (%),Avg Memory Usage (MB),Std Memory Usage,Avg Proving Time (s),Std Proving Time
0,opml (pytorch),Input-Dense-Dense (784 * 56 * 10),3,44543,250,0.4,88.998094,2.285579,3.655122,0.440126
1,opml (pytorch),"Input-Dense-Dense (784 * 56 * 10, w/ relu)",3,44543,1000,20.9,89.122078,2.247846,3.664727,0.433921
2,opml (pytorch),Input-Dense-Dense (784 * 56 * 10),3,44543,2500,0.72,89.120883,2.254392,3.609974,0.421732
3,opml (pytorch),Input-Dense-Dense (196 * 25 * 10,3,5185,5000,3.52,74.351948,1.419725,0.80944,0.074357


## Benchmark for 784_56_10 DNN Model

In [None]:
# TODO: use json file for model (results, config etc) management
arch_folder = "input-dense-dense/"
layers = [196,25,10]
model_name = "_".join([str(x) for x in layers])
model_path = "../../models/"

state_dict = torch.load(model_path + arch_folder+ model_name + ".pth")
list_vars = state_dict


In [None]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(layers[0], layers[1])  # Flatten 
        self.fc2 = nn.Linear(layers[1], layers[2])  

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim = 1)
    
model_pt = Net()
model_pt.load_state_dict(state_dict)
model_pt.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Ensure gradients are not computed
    predictions = model_pt(test_images_pt)
    predicted_labels = predictions.argmax(dim=1)

predicted_labels = predicted_labels.tolist()

In [None]:
# Evaluate the PyTorch model
accuracy = evaluate_pytorch_model(model_pt, test_loader)
print(f'Accuracy of the PyTorch model on the test images: {accuracy:.8f}%')

In [None]:


fname_out = "./bin/" + arch_folder + "ggml-model-" + model_name + ".bin"
pack_fmt = "!i"

os.makedirs("./bin/" + arch_folder, exist_ok=True)

fout = open(fname_out, "w+b")
fout.write(struct.pack(pack_fmt, 0x67676d6c)) # magic: ggml in hex

for name in list_vars.keys():
    data = list_vars[name].squeeze().numpy()
    print("Processing variable: " + name + " with shape: ", data.shape) 
    n_dims = len(data.shape)
   
    fout.write(struct.pack(pack_fmt, n_dims))
    
    data = data.astype(np.float32)
    for i in range(n_dims):
        fout.write(struct.pack(pack_fmt, data.shape[n_dims - 1 - i]))

    # data
    data = data.astype(">f4")
    data.tofile(fout)

fout.close()

### Benchmark

In [None]:
test_size = 10
veri_infer, mem_usage, time_cost = benchmark(test_images_pt[:test_size], './tmp/',model_in_path, vm_file=vm_file, program=program)

In [None]:
new_row = {
    'Framework': ['opml (pytorch)'],
    'Architecture': ['Input-Dense-Dense (196x25x10'],
    '# Layers': [3],
    '# Parameters': [5185],
    'Testing Size': [len(veri_infer)],
    'Accuracy Loss (%)': [calculate_loss(veri_infer, predicted_labels)],
    'Avg Memory Usage (MB)': [sum(mem_usage) / len(mem_usage)],
    'Std Memory Usage': [pd.Series(mem_usage).std()],
    'Avg Proving Time (s)': [sum(time_cost) / len(time_cost)],
    'Std Proving Time': [pd.Series(time_cost).std()]
}

new_row_df = pd.DataFrame(new_row)


In [None]:
df = pd.concat([df, new_row_df], ignore_index=True)
df.to_csv(csv_path, index=False)
df

## Benchmark for 196_24_14_10 DNN Model

In [12]:
# TODO: use json file for model (results, config etc) management
arch_folder = "input-dense-dense-dense/"
layers = [196,24,14,10]
model_name = "_".join([str(x) for x in layers])
model_path = "../../models/"

state_dict = torch.load(model_path + arch_folder+ model_name + ".pth")
list_vars = state_dict


In [14]:
# 196_24_14_10
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(layers[0], layers[1])  # Flatten 
        self.fc2 = nn.Linear(layers[1], layers[2])
        self.fc3 = nn.Linear(layers[2], num_classes)  

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.softmax(x, dim = 1)
    
model_pt = Net()
model_pt.load_state_dict(state_dict)
model_pt.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Ensure gradients are not computed
    predictions = model_pt(test_images_pt)
    predicted_labels = predictions.argmax(dim=1)

predicted_labels = predicted_labels.tolist()

In [37]:
tmp_folder = './tmp/'
os.makedirs(tmp_folder, exist_ok=True)

In [None]:
fname_out = "./bin/" + arch_folder + "ggml-model-" + model_name + ".bin"
pack_fmt = "!i"

os.makedirs("./bin/" + arch_folder, exist_ok=True)

fout = open(fname_out, "w+b")
fout.write(struct.pack(pack_fmt, 0x67676d6c)) # magic: ggml in hex

for name in list_vars.keys():
    data = list_vars[name].squeeze().numpy()
    print("Processing variable: " + name + " with shape: ", data.shape) 
    n_dims = len(data.shape)
   
    fout.write(struct.pack(pack_fmt, n_dims))
    
    data = data.astype(np.float32)
    for i in range(n_dims):
        fout.write(struct.pack(pack_fmt, data.shape[n_dims - 1 - i]))

    # data
    data = data.astype(">f4")
    data.tofile(fout)

fout.close()

In [None]:
loss_results = calculate_loss(veri_infer, predicted_labels)

In [None]:
new_row = {
    'Framework': ['opml (pytorch)'],
    'Architecture': ['Input-Dense-Dense (196x25x10'],
    '# Layers': [3],
    '# Parameters': [5185],
    'Testing Size': [len(veri_infer)],
    'Accuracy Loss (%)': [loss_results],
    'Avg Memory Usage (MB)': [sum(mem_usage) / len(mem_usage)],
    'Std Memory Usage': [pd.Series(mem_usage).std()],
    'Avg Proving Time (s)': [sum(time_cost) / len(time_cost)],
    'Std Proving Time': [pd.Series(time_cost).std()]
}

new_row_df = pd.DataFrame(new_row)