# Model Initialization

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import brevitas.nn as qnn

# Define the transformation for dataset (if needed)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load the training and test datasets
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.quant_inp1 = qnn.QuantIdentity(bit_width=8, signed=False, return_quant_tensor=True)
        self.fc1 = qnn.QuantLinear(784, 64, bias=False,
                                   weight_bit_width=8,
                                   bias_bit_width=8)
        self.relu1 = qnn.QuantReLU(bit_width=26, return_quant_tensor=True)
        self.quant_inp2 = qnn.QuantIdentity(bit_width=8, signed=False, return_quant_tensor=True)
        self.fc2 = qnn.QuantLinear(64, 64, bias=False,
                                   weight_bit_width=8,
                                   bias_bit_width=8)
        self.relu2 = qnn.QuantReLU(bit_width=22, return_quant_tensor=True)
        self.quant_inp3 = qnn.QuantIdentity(bit_width=8, signed=False, return_quant_tensor=True)
        self.fc3 = qnn.QuantLinear(64, 64, bias=False,
                                   weight_bit_width=8,
                                   bias_bit_width=8)
        self.relu3 = qnn.QuantReLU(bit_width=22, return_quant_tensor=True)
        self.quant_inp4 = qnn.QuantIdentity(bit_width=8, signed=False, return_quant_tensor=True)
        self.fc4 = qnn.QuantLinear(64, 10, bias=False,
                                   weight_bit_width=8,
                                   bias_bit_width=8)
        self.relu4 = qnn.QuantReLU(bit_width=22, return_quant_tensor=True)
        self.quant_inp5 = qnn.QuantIdentity(bit_width=8, signed=False, return_quant_tensor=True)
        
    def forward(self, x):
        x = torch.flatten(x, 1)  # Flatten the input tensor
        x = self.quant_inp1(x)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.quant_inp2(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.quant_inp3(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.quant_inp4(x)
        x = self.fc4(x)
        x = self.relu4(x)
        x = self.quant_inp5(x)
        return x
    
    #new function to extract activations after each layer

    def forward_compute(self, x):
        x = torch.flatten(x, 1)  # Flatten the input tensor
        y = []
        z = []
        x = self.quant_inp1(x)
        y.append(x)
        x = self.fc1(x)
        x = self.relu1(x)
        y.append(x)
        x = self.quant_inp2(x)
        z.append(x)
        x = self.fc2(x)
        x = self.relu2(x)
        y.append(x)
        x = self.quant_inp3(x)
        z.append(x)
        x = self.fc3(x)
        x = self.relu3(x)
        y.append(x)
        x = self.quant_inp4(x)
        z.append(x)
        x = self.fc4(x)
        x = self.relu4(x)
        y.append(x)
        x = self.quant_inp5(x)
        z.append(x)
        return x,y,z


No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [2]:
model = SimpleNN()
model.load_state_dict(torch.load('model_weights_quantized.pth', map_location='cpu'))

<All keys matched successfully>

### Some Helper Functions

In [5]:
import numpy as np

In [3]:
def to_binary(val, bitwidth):
    if val < 0:
        val = (1 << bitwidth) + val
    return f"{val:0{bitwidth}b}"

In [6]:
def float_to_int_n(values, scale, bit_width, signed=True):
    # Calculate the scaled values
    scaled_values = values / scale
    scaled_values = np.round(scaled_values)
    
    if(signed == True):
        # Determine the range for n-bit signed integers
        int_n_min = -2**(bit_width - 1)
        int_n_max = 2**(bit_width - 1) - 1
        
        # Clip the values to fit within n-bit signed integer range
        scaled_values = np.clip(scaled_values, int_n_min, int_n_max)
        
        # Convert to n-bit integers by truncating to fit in n-bits
        int_n_values = scaled_values.astype(np.int32)  # Initially convert to 32-bit integers
        
        # Manually handle the n-bit integer range
        int_n_values = int_n_values & ((1 << bit_width) - 1)  # Mask to n bits
        
        # Handle sign extension for negative values
        sign_bit = 1 << (bit_width - 1)
        int_n_values = np.where(int_n_values & sign_bit, int_n_values | ~((1 << bit_width) - 1), int_n_values)
        
    else:
        # Determine the range for n-bit unsigned integers
        int_n_min = 0
        int_n_max = 2**bit_width - 1

        # Clip the values to fit within n-bit unsigned integer range
        scaled_values = np.clip(scaled_values, int_n_min, int_n_max)

        # Convert to n-bit integers by truncating to fit in n-bits
        int_n_values = scaled_values.astype(np.uint32)  # Initially convert to 32-bit unsigned integers

        # Manually handle the n-bit integer range
        int_n_values = int_n_values & ((1 << bit_width) - 1)  # Mask to n bits
        
    
    return int_n_values

In [None]:
def get_pytorch_quantized_outputs(quants, layers, bitwidth=8):
    outputs = {}
    
    for i in layers:
        if i in range(1, 5):
            # PyTorch outputs
            output_values = quants[i-1].value.numpy().flatten()
            scale = quants[i-1].scale.item()
            outputs[f'layer{i}_pytorch'] = float_to_int_n(output_values, scale, bitwidth, False)
    
    return outputs

In [None]:
import numpy as np

def get_fpga_simulation_outputs(model, outputs, bitwidth=8):
    count = 0
    results = {}
    
    for name, module in model.named_modules():
        if isinstance(module, qnn.QuantLinear):
                int_weights = module.int_weight().detach().cpu().numpy()
                weight_scale = module.quant_weight_scale().detach().cpu().numpy() if hasattr(module, 'quant_weight_scale') else None

                if count == 0:
                    output_values = outputs[1].value.numpy().flatten()
                    scale = outputs[0].scale.item() * weight_scale
                    bitwidth = int(outputs[1].bit_width.item())
                    input_vector = float_to_int_n(output_values, scale, bitwidth, False) / (2**10)
                    out = input_vector.astype(int)
                    out[out < 0] = 0
                    results['FPGA_outputlayer1'] = out

                if count == 1:
                    input_vector = np.dot(int_weights, results['FPGA_outputlayer1']) / (2**8)
                    out = input_vector.astype(int)
                    out[out < 0] = 0
                    results['FPGA_outputlayer2'] = out

                if count == 2:
                    input_vector = np.dot(int_weights, results['FPGA_outputlayer2']) / (2**8)
                    out = input_vector.astype(int)
                    out[out < 0] = 0
                    results['FPGA_outputlayer3'] = out

                if count == 3:
                    input_vector = np.dot(int_weights, results['FPGA_outputlayer3']) / (2**8)
                    out = input_vector.astype(int)
                    out[out < 0] = 0
                    results['FPGA_outputlayer4'] = out

                count += 1

    return results

In [8]:
# Evaluate the model
model.eval()  # Set the model to evaluation mode
with open("input_data.txt", "w") as f:
    for i in range(2):
        data, targets = train_dataset[i]

        # Re-evaluating the model for generating outputs
        with torch.no_grad():
            out, outputs, quants = model.forward_compute(data)
            _, predicted = torch.max(out.value, 1)
        
        # Assuming the output is the same as mentioned
        output_values = outputs[0].value.numpy().flatten()
        scale = outputs[0].scale.item()
        bitwidth = 9

        # Convert the floating point values to 8-bit signed integers
        input_vector = float_to_int_n(output_values, scale, bitwidth, False)

        # Convert to binary and write to file
        input_vector_verilog = [to_binary(val, bitwidth) for val in input_vector]
        
        for bin_value in input_vector_verilog:
            f.write(f"{bin_value}\n")
    


In [None]:
def evaluate_model_with_detailed_analysis(model, dataset, bitwidth=8):
    model.eval()  # Set the model to evaluation mode

    total_match = 0
    total_samples = 0
    layers = [4]
    index = []
    with torch.no_grad():
        for data, targets in dataset:
            out, outputs, quants = model.forward_compute(data)
            _, predicted = torch.max(out.value, 1)
            pyOut = get_pytorch_quantized_outputs(quants, layers, bitwidth)
            vivadoOut = get_fpga_simulation_outputs(model, outputs)
            py =  pyOut['layer4_pytorch']
            viv = vivadoOut['FPGA_outputlayer4']          
            
            pyClass =  np.argmax(py)
            vivClass = np.argmax(viv)
            
            if(pyClass == vivClass):
                total_match = total_match + 1
            else:
                index.append(total_samples)
                
        
            total_samples += 1

    return index,total_match, total_samples

In [None]:
index, total, match = evaluate_model_with_detailed_analysis(model, train_dataset, bitwidth=8)

#### Storing Mismatch Index

In [None]:
import torch
import numpy as np

def evaluate_model_with_detailed_analysis(model, dataset, indices, output_file, bitwidth=8):
    model.eval()  # Set the model to evaluation mode

    total_match = 0
    total_samples = 0
    layers = [4]
    index = []

    with torch.no_grad():
        with open(output_file, 'w') as f:
            for i in indices:
                data, targets = dataset[i]
                out, outputs, quants = model.forward_compute(data)
                _, predicted = torch.max(out.value, 1)
                pyOut = get_pytorch_quantized_outputs(quants, layers, bitwidth)
                vivadoOut = get_fpga_simulation_outputs(model, outputs)
                py = pyOut['layer4_pytorch']
                viv = vivadoOut['FPGA_outputlayer4']

                pyClass = np.argmax(py)
                vivClass = np.argmax(viv)

                f.write(f"Index: {i}\n")
                f.write(f"python: {py} vivado: {viv}\n\n")
                # f.write(f"vivado: {viv}\n\n")

                if pyClass == vivClass:
                    total_match += 1
                else:
                    index.append(total_samples)

                total_samples += 1

    return index, total_match, total_samples

# Assuming train_dataset is the original dataset and indices is the array containing the given indices
indices = [1674, 2734, 3065, 6418, 10852, 13234, 15741, 16446, 16658, 17728, 18966, 22270, 28632, 29609, 31347, 32573, 34785, 35382, 41897, 42986, 45925, 45930, 48166, 50091, 50239, 51544, 53578, 55078, 56842, 57302, 57982, 58822]

output_file = "output.txt"

index, total_match, total_samples = evaluate_model_with_detailed_analysis(model, train_dataset, indices, output_file)

print(f"Indices where classes did not match: {index}")
print(f"Total matches: {total_match}")
print(f"Total samples: {total_samples}")
