In [1]:
# Most importantly, select the kernal at least 3.11

In [49]:
import os, subprocess
import torch, struct, os, psutil, subprocess, time, threading
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import tensorflow as tf
import json, ezkl
import pandas as pd

from torch.utils.data import DataLoader, TensorDataset

## Prepare

In [2]:
# Load TensorFlow MNIST data
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Convert to PyTorch tensors
test_images_pt = torch.tensor(test_images).float()
test_labels_pt = torch.tensor(test_labels)
# Flatten and normalize the images
test_images_pt = test_images_pt.view(-1, 28*28) / 255.0  # Flatten and normalize

# Assuming test_images_pt is your PyTorch tensor with shape [num_samples, 784]
test_images_pt_reshaped = test_images_pt.view(-1, 1, 28, 28)  # Reshape to [num_samples, channels, height, width]

# Downsample images
test_images_pt_downsampled = F.interpolate(test_images_pt_reshaped, size=(14, 14), mode='bilinear', align_corners=False)

# Flatten the images back to [num_samples, 14*14]
test_images_pt_downsampled = test_images_pt_downsampled.view(-1, 14*14)

In [3]:
def evaluate_pytorch_model(model, datasets, labels):
    # Create TensorDataset for test data
    test_dataset = TensorDataset(datasets, labels)
    # Create a DataLoader for the test dataset
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

In [45]:
import subprocess, concurrent
import psutil
import time

def monitor_memory(pid, freq = 0.01):
    p = psutil.Process(pid)
    max_memory = 0
    while True:
        try:
            mem = p.memory_info().rss / (1024 * 1024)
            max_memory = max(max_memory, mem)
        except psutil.NoSuchProcess:
            break  # Process has finished
        time.sleep(freq)  # Poll every second
        
    #print(f"Maximum memory used: {max_memory} MB")
    return max_memory

def execute_and_monitor(command, show = False):
    start_time = time.time()
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future = executor.submit(monitor_memory, process.pid)
        stdout, stderr = process.communicate()
        max_memory = future.result()
    if show:
        print(f"Maximum memory used: {max_memory} MB")
        print("Total time:", time.time() - start_time)
    return stdout, stderr, max_memory


In [47]:
def benchmark(test_images, predictions, model, output_folder='./tmp/'):
    data_path = os.path.join(output_folder, 'input.json')
    model_path = os.path.join(output_folder, 'network.onnx')

    sampled_data = test_images[0]
    torch.onnx.export(model, 
                        sampled_data, 
                        model_path, 
                        export_params=True, 
                        opset_version=10, 
                        do_constant_folding=True, 
                        input_names=['input_0'], 
                        output_names=['output'])
    loss = 0
    mem_usage = []
    time_cost = []
    benchmark_start_time = time.time()

    for i, img in enumerate(test_images):
        cost = 0
        print ("Process for image", i)
        start_time = time.time()
        # Convert the tensor to numpy array and reshape it for JSON serialization
        x = (img.cpu().detach().numpy().reshape([-1])).tolist()
        data = dict(input_data = [x])

        # Serialize data into file:
        json.dump(data, open(data_path, 'w'))

        command = ["python", "gen_proof.py", "--model", model_path, "--data", data_path, "--output", output_folder]

        stdout, _, usage = execute_and_monitor(command)
        pred = int(stdout[-2])

        if pred != predictions[i]:
            loss += 1
            print ("Loss happens on index", i, "predicted_class", pred)
        mem_usage.append(usage)
        time_cost.append(time.time() - start_time)

    print ("Total time:", time.time() - benchmark_start_time)
    return loss, mem_usage, time_cost

In [50]:
csv_path = '../../benchmarks/benchmark_results.csv'

columns = ['Framework', 'Architecture', '# Layers', '# Parameters', 'Testing Size', 'Accuracy Loss (%)', 
           'Avg Memory Usage (MB)', 'Std Memory Usage', 'Avg Proving Time (s)', 'Std Proving Time']

# Check if the CSV file exists
if not os.path.isfile(csv_path):
    # Create a DataFrame with the specified columns
    df = pd.DataFrame(columns=columns)
    # Save the DataFrame as a CSV file
    df.to_csv(csv_path, index=False)
else:
    print(f"File '{csv_path}' already exists.")


File '../../benchmarks/benchmark_results.csv' already exists.


## Benchmark for 196_25_10 DNN Model

In [4]:
# After login
arch_folder = "input-dense-dense/"
layers = [196,25,10]
model_name = "_".join([str(x) for x in layers])
model_path = "../../models/" + arch_folder + model_name

state_dict = torch.load(model_path + ".pth")

output_folder = './tmp/' + "_".join([str(x) for x in layers]) + "/"
os.makedirs(output_folder, exist_ok=True)

In [5]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(layers[0], layers[1])  # Flatten 
        self.fc2 = nn.Linear(layers[1], layers[2])  

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
model_pt = Net()
model_pt.load_state_dict(state_dict)
model_pt.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Ensure gradients are not computed
    predictions = model_pt(test_images_pt_downsampled)
    predicted_labels = predictions.argmax(dim=1)

predicted_labels = predicted_labels.tolist()

In [6]:
# Evaluate the PyTorch model
accuracy = evaluate_pytorch_model(model_pt, test_images_pt_downsampled, test_labels_pt)
print(f'Accuracy of the PyTorch model on the test images: {accuracy:.8f}%')

Accuracy of the PyTorch model on the test images: 95.41000000%


### Benchmark

In [48]:
test_size = 10
loss_, mem_usage_, time_cost_ = benchmark(test_images_pt_downsampled[:test_size], predicted_labels[:test_size], model_pt)

Process for image 0
Process for image 1
Process for image 2
Process for image 3
Process for image 4
Process for image 5
Process for image 6
Process for image 7
Process for image 8
Process for image 9
Total time: 7.1037983894348145


In [None]:
new_row = {
    'Framework': ['ezkl (pytorch)'],
    'Architecture': ['Input-Dense-Dense (196x24x14x10'],
    '# Layers': [4],
    '# Parameters': [5228],
    'Testing Size': [test_size],
    'Accuracy Loss (%)': [loss_/test_size*100],
    'Avg Memory Usage (MB)': [sum(mem_usage_) / len(mem_usage_)],
    'Std Memory Usage': [pd.Series(mem_usage_).std()],
    'Avg Proving Time (s)': [sum(time_cost_) / len(time_cost_)],
    'Std Proving Time': [pd.Series(time_cost_).std()]
}

new_row_df = pd.DataFrame(new_row)

In [None]:
df = pd.read_csv(csv_path)

df = pd.concat([df, new_row_df], ignore_index=True)
df.to_csv(csv_path, index=False)
df

## Benchmark for 196_24_14_10 DNN Model

In [51]:
# After login
arch_folder = "input-dense-dense-dense/"
layers = [196,24,14,10]
model_name = "_".join([str(x) for x in layers])
model_path = "../../models/" + arch_folder + model_name

state_dict = torch.load(model_path + ".pth")

output_folder = './tmp/' + "_".join([str(x) for x in layers]) + "/"
os.makedirs(output_folder, exist_ok=True)

In [53]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(layers[0], layers[1])  # Flatten 
        self.fc2 = nn.Linear(layers[1], layers[2]) 
        self.fc3 = nn.Linear(layers[2], layers[3]) 

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
model_pt = Net()
model_pt.load_state_dict(state_dict)
model_pt.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Ensure gradients are not computed
    predictions = model_pt(test_images_pt_downsampled)
    predicted_labels = predictions.argmax(dim=1)

predicted_labels = predicted_labels.tolist()

In [54]:
# Evaluate the PyTorch model
accuracy = evaluate_pytorch_model(model_pt, test_images_pt_downsampled, test_labels_pt)
print(f'Accuracy of the PyTorch model on the test images: {accuracy:.8f}%')

Accuracy of the PyTorch model on the test images: 95.56000000%


### Benchmark

In [55]:
test_size = 10
loss_, mem_usage_, time_cost_ = benchmark(test_images_pt_downsampled[:test_size], predicted_labels[:test_size], model_pt)

Process for image 0
Process for image 1
Process for image 2
Process for image 3
Process for image 4
Process for image 5
Process for image 6
Process for image 7
Process for image 8
Process for image 9
Total time: 7.431151866912842


In [None]:
new_row = {
    'Framework': ['ezkl (pytorch)'],
    'Architecture': ['Input-Dense-Dense (196x24x14x10'],
    '# Layers': [4],
    '# Parameters': [5228],
    'Testing Size': [test_size],
    'Accuracy Loss (%)': [loss_/test_size*100],
    'Avg Memory Usage (MB)': [sum(mem_usage_) / len(mem_usage_)],
    'Std Memory Usage': [pd.Series(mem_usage_).std()],
    'Avg Proving Time (s)': [sum(time_cost_) / len(time_cost_)],
    'Std Proving Time': [pd.Series(time_cost_).std()]
}

new_row_df = pd.DataFrame(new_row)

In [None]:
df = pd.read_csv(csv_path)

df = pd.concat([df, new_row_df], ignore_index=True)
df.to_csv(csv_path, index=False)
df

## Benchmark for 784_56_10 DNN Model

In [56]:
# After login
arch_folder = "input-dense-dense/"
layers = [784,56,10]
model_name = "_".join([str(x) for x in layers])
model_path = "../../models/" + arch_folder + model_name

state_dict = torch.load(model_path + ".pth")

output_folder = './tmp/' + "_".join([str(x) for x in layers]) + "/"
os.makedirs(output_folder, exist_ok=True)

In [57]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(layers[0], layers[1])  # Flatten 
        self.fc2 = nn.Linear(layers[1], layers[2])  

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
model_pt = Net()
model_pt.load_state_dict(state_dict)
model_pt.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Ensure gradients are not computed
    predictions = model_pt(test_images_pt)
    predicted_labels = predictions.argmax(dim=1)

predicted_labels = predicted_labels.tolist()

In [58]:
# Evaluate the PyTorch model
accuracy = evaluate_pytorch_model(model_pt, test_images_pt, test_labels_pt)
print(f'Accuracy of the PyTorch model on the test images: {accuracy:.8f}%')

Accuracy of the PyTorch model on the test images: 97.40000000%


### Benchmark

In [59]:
test_size = 10
loss_, mem_usage_, time_cost_ = benchmark(test_images_pt[:test_size], predicted_labels[:test_size], model_pt)

Process for image 0
Process for image 1
Process for image 2
Process for image 3
Process for image 4
Process for image 5
Process for image 6
Process for image 7
Process for image 8
Process for image 9
Total time: 30.195895195007324


In [None]:
new_row = {
    'Framework': ['ezkl (pytorch)'],
    'Architecture': ['Input-Dense-Dense (196x24x14x10'],
    '# Layers': [4],
    '# Parameters': [5228],
    'Testing Size': [test_size],
    'Accuracy Loss (%)': [loss_/test_size*100],
    'Avg Memory Usage (MB)': [sum(mem_usage_) / len(mem_usage_)],
    'Std Memory Usage': [pd.Series(mem_usage_).std()],
    'Avg Proving Time (s)': [sum(time_cost_) / len(time_cost_)],
    'Std Proving Time': [pd.Series(time_cost_).std()]
}

new_row_df = pd.DataFrame(new_row)

In [None]:
df = pd.read_csv(csv_path)

df = pd.concat([df, new_row_df], ignore_index=True)
df.to_csv(csv_path, index=False)
df