# Hello World Example

This is a simple Jupyter Notebook that walks through the 4 steps of compiling and running a PyTorch model on the embedded Neural Processing Unit (NPU) in your AMD Ryzen AI enabled PC. The steps are as follows:

1. Get model
2. Export to ONNX
3. Quantize
4. Run Model on CPU and IPU

In [1]:
# Before starting, be sure you've installed the requirements listed in the requirements.txt file:
!python -m pip install -r requirements.txt



### 1. Get Model
Here, we'll use the PyTorch library to define and instantiate a simple neural network model called `SmallModel`.

In [1]:
import os

print(os.environ['NUM_OF_DPU_RUNNERS'])
os.environ['NUM_OF_DPU_RUNNERS'] = "4"
print(os.environ['NUM_OF_DPU_RUNNERS'])

1
4


In [2]:
import scipy

def generate_win_coeffs(M, P, window_fn="hamming"):
    win_coeffs = scipy.signal.get_window(window_fn, M*P)
    sinc       = scipy.signal.firwin(M * P, cutoff=1.0/P, window="rectangular")
    win_coeffs *= sinc
    return win_coeffs

In [3]:
import torch
import torch.nn as nn
import torch.fft as fft
import numpy as np

# PyTorch FIR layer
class FIR_layer(nn.Module):
    def __init__(self, win_coeffs):
        super(FIR_layer, self).__init__()
        self.win_coeffs = win_coeffs
        self.size = self.win_coeffs.shape[0]
        self.win_coeffs = self.win_coeffs.unsqueeze(0).unsqueeze(0).unsqueeze(0)
        self.FIR = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1, self.size), stride=(1, 1), padding=(0, self.size//2), bias=False, groups=1)
        self.FIR.weight = nn.Parameter(self.win_coeffs)
        for param in self.FIR.parameters():
            param.requires_grad = False

    def forward(self, input):
        out = self.FIR(input)
        return out

class FFTLayer(nn.Module):
    def __init__(self, input_size):
        super(FFTLayer, self).__init__()
        self.input_size = input_size
        self.FFTconv = nn.Conv2d(input_size, (input_size * 2), kernel_size=(1, 1), bias=False)
        #F = torch.zeros((input_size, input_size), dtype=torch.complex128)


        F = torch.from_numpy(np.fft.fft(np.eye(self.input_size)))
        self.FFTconv.weight.data[0:self.input_size ,:,:] = torch.unsqueeze(torch.unsqueeze(F.real.float(), -1), -1)
        self.FFTconv.weight.data[self.input_size:(self.input_size *2),:,:] = torch.unsqueeze(torch.unsqueeze(F.imag.float(), -1), -1)
        #self.FFTconv.weight.requires_grad = False  # Set to `True` if you want to fine-tune the weights




    def forward(self, x):

        output = self.FFTconv(x)



        return output

class PFB_FIR(nn.Module):
    def __init__(self, win_coeffs, M, P, expected_input_size):
        super(PFB_FIR, self).__init__()
        self.win_coeffs = win_coeffs.reshape((M, P)).T
        self.win_coeffs = self.win_coeffs.unsqueeze(0).unsqueeze(1)
        self.win_coeffs = self.win_coeffs.view(P, 1, 1, M)
        self.P = P
        self.M = M
        self.size = expected_input_size
        self.W = int(self.size / self.M / self.P)
        self.Maxsize =  self.M * self.W - self.M
        self.WM = self.M * self.W
        self.FIR = nn.Conv2d(in_channels=self.P, out_channels=self.P, kernel_size=(1, self.M), stride=(1, 1), padding=(0, 0), bias=False, groups=self.P)
        self.FIR.weight = nn.Parameter(self.win_coeffs)
        for param in self.FIR.parameters():
            param.requires_grad = False

    def forward(self, input):
        input = input.view(input.shape[0], self.WM, 1, self.P).permute(0, 3, 2, 1)[:, :, :, 0:self.WM-1]
        out = self.FIR(input)
        return out


class PFB_FIR_FFT(nn.Module):
    def __init__(self, win_coeffs, M, P, expected_input_size):
        super(PFB_FIR_FFT, self).__init__()
        self.win_coeffs = win_coeffs.reshape((M, P)).T
        self.win_coeffs = self.win_coeffs.unsqueeze(0).unsqueeze(1)
        self.win_coeffs = self.win_coeffs.view(P, 1, 1, M)
        self.P = P
        self.M = M
        self.size = expected_input_size
        self.W = int(self.size / self.M / self.P)
        self.Maxsize =  self.M * self.W - self.M
        self.WM = self.M * self.W
        self.FIR = nn.Conv2d(in_channels=self.P, out_channels=self.P, kernel_size=(1, self.M), stride=(1, 1), padding=(0, 0), bias=False, groups=self.P)
        self.FFTlayer = FFTLayer(input_size=self.Maxsize)
        self.FIR.weight = nn.Parameter(self.win_coeffs)
        #for param in self.FIR.parameters():
            #param.requires_grad = False

    def forward(self, input):
        input = input.view(input.shape[0], self.WM, 1, self.P).permute(0, 3, 2, 1)[:, :, :, 0:self.WM-1]
        input = self.FIR(input)
        out = self.FFTlayer(input.view(input.shape[0], self.Maxsize, 1, self.P))
        return out


class slidingwindow_layer(nn.Module):
  def __init__(self,size, stride, padding ) -> None:
    super(slidingwindow_layer, self).__init__()
    self.size = size
    self.stride = stride
    self.padding = padding
    self.Slidwind = nn.Conv2d(in_channels=1, out_channels=self.size, kernel_size= (1, self.size), stride=(1,self.stride), padding=(self.padding,self.padding), bias=False, groups = 1)
    with torch.no_grad():
        # Set the weights to mimic the identity operation
        identity_filter = torch.eye(self.size).reshape(self.size,1, 1,  self.size )
        self.Slidwind.weight[:] = identity_filter

  def forward(self, input):
    slidwind_output = self.Slidwind(input)

    return slidwind_output

In [4]:
def createmodel(size):
    x = np.sin(np.arange(0, size * 10) / np.pi)
    #xcoefs = np.sin(np.arange(0, size * 1000) / np.pi)

    coeffs =  torch.from_numpy(np.random.rand(size))
    
    
    
    
    x = np.random.rand(*x.shape).astype(np.float32)
    
    # Timing pfb_fir_frontend_TINA_FFT
    xinput = np.random.rand(1,1, 1,*x.shape)
    PFB_layer = FIR_layer(win_coeffs = coeffs)
    PFB_layer = PFB_layer.float()
    xinput = torch.from_numpy(xinput).float()
    tmp_model_path = "models/FIR.onnx"
    torch.onnx.export(
    PFB_layer,                     # model being run
    xinput,                 # model input (or a tuple for multiple inputs)
    tmp_model_path,                     # where to save the model
    export_params=True,            # store the trained parameter weights inside the model file
    opset_version=13,              # the ONNX version to export the model to
    input_names=['input'],         # the model's input names
    output_names=['output'],       # the model's output names
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}  # variable length axes
    )

    # `input_model_path` is the path to the original, unquantized ONNX model.
    input_model_path = "models/FIR.onnx"
    
    # `output_model_path` is the path where the quantized model will be saved.
    output_model_path = "models/FIR_quantized.onnx"
    
    vai_q_onnx.quantize_static(
    input_model_path,
    output_model_path,
    calibration_data_reader=None,
    quant_format=vai_q_onnx.QuantFormat.QDQ,
    calibrate_method=vai_q_onnx.PowerOfTwoMethod.MinMSE,
    activation_type=vai_q_onnx.QuantType.QUInt8,
    weight_type=vai_q_onnx.QuantType.QInt8,
    enable_ipu_cnn=True,
    extra_options={'ActivationSymmetric': True}
    )
    

In [5]:
def runIPU(input_data):
    # Compile and run

    # Point to the config file path used for the VitisAI Execution Provider
    config_file_path = "vaip_config.json"
    
    #aie_options = onnxruntime.SessionOptions()
    #aie_options.enable_profiling = True
    
    aie_session = onnxruntime.InferenceSession(
        "models/FIR_quantized.onnx",
        providers = ['VitisAIExecutionProvider'],
        sess_options=aie_options,
        provider_options=[{'config_file': config_file_path}]
    )
    
    ryzen_outputs = aie_session.run(None, {'input': input_data})
    start = timer()
    ryzen_outputs = aie_session.run(None, {'input': input_data})
    aie_total = timer() - start
    
    #aie_session.end_profiling()
    return aie_total

In [6]:
def pfb_fir_frontend_FFT(x, win_coeffs, M, P):
    #print("it X", x.shape)
    W = int(x.shape[0] / M / P)
    x_p = x.reshape((W*M, P)).T
    h_p = win_coeffs.reshape((M, P)).T
    x_summed = np.zeros((P, M * W - M))
    for t in range(0, M*W-M):
        x_weighted = x_p[:, t:t+M] * h_p
        x_summed[:, t] = x_weighted.sum(axis=1)
    return np.fft.fft(x_summed.T,  axis=1)

def numpy_unfold(vector, size = 3):
  output = np.ones((vector.size - size +1, size))

  for i in range(vector.size - size +1):
    for j in range(size):
      output[i, j] = vector[i + j]
  return output

# Numpy FIR filter implementation
def numpy_fir_filter(signal, coefficients):
    num_taps = len(coefficients)
    signal_length = len(signal)
    filtered_signal = np.zeros(signal_length)

    for n in range(signal_length):
        for k in range(num_taps):
            if n - k >= 0:
                filtered_signal[n] += coefficients[k] * signal[n - k]

    return filtered_signal

In [7]:
import onnxruntime
import numpy as np
from timeit import default_timer as timer

def benchmarkCPU(size):
    #print("M = ", M, " P = ", P)
    # Create some random input data for testing
    input_data_test = np.random.uniform(low=-1, high=1, size=[size * 10]).astype(np.float32)
    coefficients = np.random.uniform(low=-1, high=1, size=[size]).astype(np.float32)
    
    #cpu_options = onnxruntime.SessionOptions()
    #cpu_options.enable_profiling = True
    

    start = timer()
    output = numpy_fir_filter(signal = input_data_test, coefficients = coefficients)
    cpu_total = timer() - start

    return cpu_total 
    
    #cpu_session.end_profiling()

In [8]:
def comparedspeedup(time1, time2):
  if(time1<= time2):
    return time1
  else:
    return time2

In [9]:
import time


M_increment = 1
P_increment = 16
Min_M = 1
Min_P = 16
Max_M = 30
Max_P = 1024
batchsize = 1
Maxloop = 30

# Placeholder for results
results = []

M = 16
P = 256

x = np.sin(np.arange(0, M * P * 10) / np.pi)
win_coeffs = generate_win_coeffs(M, P, window_fn="hamming")




x = np.random.rand(*x.shape).astype(np.float32)

win_coeffs = torch.from_numpy(win_coeffs)

# Timing pfb_fir_frontend_TINA_FFT
xinput = np.random.rand(batchsize,*x.shape)
PFB_layer = PFB_FIR_FFT(win_coeffs = win_coeffs, M = M, P = P, expected_input_size=xinput.shape[1])
PFB_layer = PFB_layer.float()
xinput = torch.from_numpy(xinput).float()
output = PFB_layer(xinput)

print(output)
print(PFB_layer)

tensor([[[[ 2.9959e-01,  2.9181e-01,  2.8480e-01,  ...,  2.7345e-01,
            2.7739e-01,  3.0410e-01]],

         [[ 8.5603e-03,  1.0971e-02, -9.5091e-03,  ...,  8.1866e-03,
            7.7269e-03,  3.2711e-03]],

         [[ 7.6073e-03,  6.6400e-03, -4.5426e-03,  ...,  4.5335e-03,
            5.7717e-03, -6.6657e-03]],

         ...,

         [[-8.0581e-03, -7.6584e-03,  3.5552e-03,  ...,  8.8564e-04,
            7.9498e-03,  2.3655e-04]],

         [[ 1.3244e-02,  3.9225e-03, -8.9702e-03,  ..., -7.1733e-03,
           -8.8369e-03,  8.1739e-03]],

         [[-3.2241e-03, -1.3147e-02,  1.4094e-03,  ...,  1.4664e-02,
            1.5406e-02, -2.2268e-02]]]], grad_fn=<ConvolutionBackward0>)
PFB_FIR_FFT(
  (FIR): Conv2d(256, 256, kernel_size=(1, 16), stride=(1, 1), groups=256, bias=False)
  (FFTlayer): FFTLayer(
    (FFTconv): Conv2d(144, 288, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
)


In [10]:
# Convert the model to ONNX
tmp_model_path = "models/pfb.onnx"
torch.onnx.export(
    PFB_layer,                     # model being run
    xinput,                 # model input (or a tuple for multiple inputs)
    tmp_model_path,                     # where to save the model
    export_params=True,            # store the trained parameter weights inside the model file
    opset_version=13,              # the ONNX version to export the model to
    input_names=['input'],         # the model's input names
    output_names=['output'],       # the model's output names
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}  # variable length axes
)


In [11]:
import vai_q_onnx

# `input_model_path` is the path to the original, unquantized ONNX model.
input_model_path = "models/pfb.onnx"

# `output_model_path` is the path where the quantized model will be saved.
output_model_path = "models/pfb_quantized.onnx"

vai_q_onnx.quantize_static(
    input_model_path,
    output_model_path,
    calibration_data_reader=None,
    quant_format=vai_q_onnx.QuantFormat.QDQ,
    calibrate_method=vai_q_onnx.PowerOfTwoMethod.MinMSE,
    activation_type=vai_q_onnx.QuantType.QUInt8,
    weight_type=vai_q_onnx.QuantType.QInt8,
    enable_ipu_cnn=True,
    extra_options={'ActivationSymmetric': True}
)

print('Calibrated and quantized model saved at:', output_model_path)

INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/pfb.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 40960] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/pfb.onnx can run inference successfully
INFO:vai_q_onnx.quantize:Removed initializers from input
INFO:vai_q_onnx.quantize:Loading model...
INFO:vai_q_onnx.quantize:enable_ipu_cnn is True, optimize the model for better hardware compatibility.
INFO:vai_q_onnx.quantize:Start calibration...
INFO:vai_q_onnx.quantize:Start collecting data, runtime depends on your model size and the number of calibration dataset.
INFO:vai_q_onnx.calibrate:Finding optimal threshold for each tensor using PowerOfTwoMethod.MinMSE algorithm ...
INFO:vai_q_onnx.calibrate:Use all calibration data to calc

[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:01:21.009245
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/pfb.onnx
                                  model_output --- models/pfb_quantized.onnx
          

Computing range: 100%|██████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 875.43tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


Calibrated and quantized model saved at: models/pfb_quantized.onnx


In [12]:
import onnxruntime
import numpy as np
from timeit import default_timer as timer

# Specify the path to the quantized ONNZ Model
onnx_model_path = "models/pfb_quantized.onnx"

# Create some random input data for testing
input_data = np.random.uniform(low=-1, high=1, size=[1,M * P * 10]).astype(np.float32)

cpu_options = onnxruntime.SessionOptions()
cpu_options.enable_profiling = True

# Create Inference Session to run the quantized model on the CPU
cpu_session = onnxruntime.InferenceSession(
    onnx_model_path,
    providers = ['CPUExecutionProvider'],
    sess_options=cpu_options,
)
start = timer()
cpu_results = cpu_session.run(None, {'input': input_data})
cpu_total = timer() - start

cpu_session.end_profiling()


'onnxruntime_profile__2024-06-17_11-01-34.json'

In [13]:
# Compile and run

# Point to the config file path used for the VitisAI Execution Provider
config_file_path = "vaip_config.json"

aie_options = onnxruntime.SessionOptions()
aie_options.enable_profiling = True

aie_session = onnxruntime.InferenceSession(
    onnx_model_path,
    providers = ['VitisAIExecutionProvider'],
    sess_options=aie_options,
    provider_options=[{'config_file': config_file_path}]
)

start = timer()
ryzen_outputs = aie_session.run(None, {'input': input_data})
aie_total = timer() - start

aie_session.end_profiling()

'onnxruntime_profile__2024-06-17_11-01-34.json'

In [14]:
print(f"Ryzen Results: {ryzen_outputs}")
print(f"CPU Results: {cpu_results}")

print(f"CPU Total Time: {cpu_total}")
print(f"IPU Total Time: {aie_total}")

Ryzen Results: [array([[[[ 0.015625  ,  0.04296875,  0.03515625, ..., -0.0078125 ,
           0.0078125 ,  0.015625  ]],

        [[-0.01171875,  0.        ,  0.        , ...,  0.01953125,
           0.        , -0.01171875]],

        [[-0.00390625,  0.00390625, -0.0234375 , ...,  0.0078125 ,
           0.01171875,  0.03515625]],

        ...,

        [[-0.0078125 , -0.01171875,  0.0234375 , ..., -0.01171875,
           0.0390625 , -0.02734375]],

        [[ 0.015625  ,  0.01171875,  0.015625  , ..., -0.00390625,
           0.0078125 ,  0.00390625]],

        [[-0.01953125,  0.01953125,  0.01171875, ...,  0.03125   ,
           0.00390625, -0.0234375 ]]]], dtype=float32)]
CPU Results: [array([[[[ 0.015625  ,  0.04296875,  0.03515625, ..., -0.0078125 ,
           0.0078125 ,  0.015625  ]],

        [[-0.01171875,  0.        ,  0.        , ...,  0.01953125,
           0.        , -0.01171875]],

        [[-0.00390625,  0.00390625, -0.0234375 , ...,  0.0078125 ,
           0.01171875,  

In [15]:
import time
import numpy as np
import pandas as pd
import torch



Range_increment = 1024
Min_Range = 8
Max_Range = 2048
base = 2
Maxloop = 50

# Placeholder for results
results = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on device: {device}")

current_range = Min_Range
while current_range <= Max_Range:
    #print("This is P: ", P)
    print("current range", current_range)
    x = np.random.rand(current_range).astype(np.float32)
    #y = np.random.rand(current_range,current_range).astype(np.float32)

    # Converting NumPy arrays to CuPy arrays
    elapsed_time_FFT = 0
    elapsed_time_TINA_FFT_32_bit = 0
    

    througput_FFT = 0
    througput_TINA_FFT_32_bit = 0
    createmodel(current_range)


    bytes_used = x.nbytes
    for i in range(Maxloop):
        x = np.random.rand(*x.shape).astype(np.float32)
        input_data = np.random.uniform(low=-1, high=1, size=[1, 1, 1, current_range * 10]).astype(np.float32)
        #print(x.shape)
        #y = np.random.rand(*y.shape).astype(np.float32)




        # Timing pfb_fir_frontend_FFT
        elapsed_time_FFT = benchmarkCPU(size = current_range)


        
        # Timing pfb_fir_frontend_TINA_elementwise_mult
        #torch_y = torch.from_numpy(y).to(device).float()
        #torch_y = torch_y.view( current_range, current_range)
        
        elapsed_time_TINA_FFT_32_bit = runIPU(input_data = input_data)
        

        
        """
        are_results_equal = check_results(result_np, result_cp, resultTINA_32bit_np, resultTINA_16bit_np, resultjax_np)

        if are_results_equal is False:
          print("results are not equal")
          print("original: ", result_np )
          print("cupy: ", result_cp)
          print("TINA 32bit: ", resultTINA_32bit_np)
          print("TINA 16bit: ", resultTINA_16bit_np)
          print("JAX: ", resultjax_np)
          """


        # Calculate speedup for CuPy, Torch, and JAX compared to NumPy
        speedup_TINA_FFT_32_bit = elapsed_time_FFT / elapsed_time_TINA_FFT_32_bit
        

        # Average the elapsed times
        throughput_FFT = elapsed_time_FFT/bytes_used
        throughput_TINA_FFT_32_bit = elapsed_time_TINA_FFT_32_bit/bytes_used
        #print("TINA: ", elapsed_time_TINA_FFT)
        #print("cupy: ", elapsed_time_cp_FFT)

        # Append the results to the list
        results.append({
        'elapsed_time_Multiplication_numpy': elapsed_time_FFT,
        'elapsed_time_TINA_Multiplication_32_bit': elapsed_time_TINA_FFT_32_bit,
        'speedup_TINA_Multiplication_32_bit': speedup_TINA_FFT_32_bit,
        'throughput_Multiplication_FFT': throughput_FFT,
        'throughput_TINA_Multiplication_32_bit': throughput_TINA_FFT_32_bit,
        'bytes used': bytes_used
        })


    current_range *= base
    current_range = int(current_range)
# Create a DataFrame with the results
df = pd.DataFrame(results)

# Divide the values by Maxloop
#df[['elapsed_time', 'elapsed_time_FFT', 'elapsed_time_cp', 'elapsed_time_cp_FFT', 'elapsed_time_TINA', 'elapsed_time_TINA_FFT']] /= Maxloop

# Export the DataFrame to an Excel file
df.to_excel('Benchmarks_FIR.xlsx', index=False)

INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 80] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
INFO:vai_q_onnx.quantize:Removed initializers from input
INFO:vai_q_onnx.quantize:Loading model...
INFO:vai_q_onnx.quantize:enable_ipu_cnn is True, optimize the model for better hardware compatibility.
INFO:vai_q_onnx.quantize:Start calibration...
INFO:vai_q_onnx.quantize:Start collecting data, runtime depends on your model size and the number of calibration dataset.
INFO:vai_q_onnx.calibrate:Finding optimal threshold for each tensor using PowerOfTwoMethod.MinMSE algorithm ...
INFO:vai_q_onnx.calibrate:Use all calibration data to c

Running on device: cpu
current range 8
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:01:44.676953
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output -

Computing range: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2000.62tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:01:47.259201 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:01:47.262203 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 160] type <class 'numpy.float32'> 
I0617 11:01:47.275204 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 160] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:01:47.279205 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:01:47.281669 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx can ru

current range 16
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:01:47.254201
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quantize

Computing range: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2000.14tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:01:47.332121 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:01:50.751309 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:01:50.753499 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 320] type <class 'numpy.float32'> 
I0617 11:01:50.764498 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 320] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:01:50.766581 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:01:50.768397 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx can ru

current range 32
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:01:50.747308
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quantize

Computing range: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2001.10tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:01:50.823511 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:01:55.091745 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:01:55.094748 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 640] type <class 'numpy.float32'> 
I0617 11:01:55.107745 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 640] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:01:55.110972 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:01:55.114250 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx can ru

current range 64
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:01:55.085748
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quantize

Computing range: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2001.58tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:01:55.168744 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:02:00.595561 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:02:00.597561 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 1280] type <class 'numpy.float32'> 
I0617 11:02:00.608561 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 1280] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:02:00.612562 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:02:00.615564 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx can 

current range 128
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:02:00.590560
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quantiz

Computing range: 100%|███████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:02:00.653456 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:02:08.272836 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:02:08.276180 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 2560] type <class 'numpy.float32'> 
I0617 11:02:08.289181 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 2560] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:02:08.293179 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:02:08.295182 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx can 

current range 256
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:02:08.267835
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quantiz

Computing range: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1999.19tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:02:08.361507 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:02:22.484720 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:02:22.488723 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 5120] type <class 'numpy.float32'> 
I0617 11:02:22.501043 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 5120] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:02:22.503551 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:02:22.507553 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx can 

current range 512
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:02:22.479720
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quantiz

Computing range: 100%|███████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:02:22.555451 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:03:00.568744 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:03:00.570747 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 10240] type <class 'numpy.float32'> 
I0617 11:03:00.581743 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 10240] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:03:00.584174 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:03:00.589219 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx ca

current range 1024
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:03:00.563745
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quanti

Computing range: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1999.67tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:03:00.638840 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).


INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration
I0617 11:05:11.703221 8696 quantize.py:164] calibration_data_reader is None, using random data for calibration
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can create InferenceSession successfully
I0617 11:05:11.706220 8696 quant_utils.py:1232] The input ONNX model models/FIR.onnx can create InferenceSession successfully
INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 1, 1, 20480] type <class 'numpy.float32'> 
I0617 11:05:11.715627 8696 quant_utils.py:507] Random input name input shape [1, 1, 1, 20480] type <class 'numpy.float32'> 
INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters
I0617 11:05:11.717626 8696 quant_utils.py:293] Obtained calibration data with 1 iters
INFO:vai_q_onnx.quant_utils:The input ONNX model models/FIR.onnx can run inference successfully
I0617 11:05:11.729963 8696 quant_utils.py:1211] The input ONNX model models/FIR.onnx ca

current range 2048
[VAI_Q_ONNX_INFO]: Time information:
2024-06-17 11:05:11.698220
[VAI_Q_ONNX_INFO]: OS and CPU information:
                                        system --- Windows
                                          node --- ChrisMiniPC
                                       release --- 10
                                       version --- 10.0.22631
                                       machine --- AMD64
                                     processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD
[VAI_Q_ONNX_INFO]: Tools version information:
                                        python --- 3.9.19
                                          onnx --- 1.16.1
                                   onnxruntime --- 1.15.1
                                    vai_q_onnx --- 1.16.0+69bc4f2
[VAI_Q_ONNX_INFO]: Quantized Configuration information:
                                   model_input --- models/FIR.onnx
                                  model_output --- models/FIR_quanti

Computing range: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1998.24tensor/s]
INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
I0617 11:05:11.799440 8696 qdq_quantizer.py:886] Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).
INFO:vai_q_onnx.refine:Shift cut of layer FIR.weight_DequantizeLinear_Output is 17. It exceeds range [0, 16]. Modify wpos from 7 to 6.
I0617 11:05:11.800439 8696 refine.py:210] Shift cut of layer FIR.weight_DequantizeLinear_Output is 17. It exceeds range [0, 16]. Modify wpos from 7 to 6.


In [None]:
import time
import numpy as np
import pandas as pd
import torch


M_increment = 10
P_increment = 256
Min_M = 1
Min_P = 128
Max_M = 31
Max_P = 1024
batchsize = 1
Maxloop = 30

# Placeholder for results
results = []

for M in range(Min_M, Max_M + 1, M_increment):
    for P in range(Min_P, Max_P + 1, P_increment):
        createmodel(M, P)
        #print("This is M again: ", M)
        print("This is M: ", M, "This is P: ", P)
        x = np.sin((np.arange(0, M * P * 10) / np.pi),  dtype=np.float32)
        win_coeffs = generate_win_coeffs(M, P, window_fn="hamming")

        # Converting NumPy arrays to CuPy arrays
        
        elapsed_time_FFT = 100
        elapsed_time_TINA_FFT = 100
        bytes_x = x.nbytes
        coef_bytes = win_coeffs.nbytes
        
        for i in range(Maxloop):
            x = np.random.rand(*x.shape).astype(np.float32)

            input_data = np.random.uniform(low=-1, high=1, size=[1,M * P * 10]).astype(np.float32)


          

            # Timing pfb_fir_frontend_FFT
           
            intermediatetime = benchmarkCPU( win_coeffs = win_coeffs, M = M, P = P)
            
            elapsed_time_FFT = comparedspeedup(elapsed_time_FFT, intermediatetime)
            #print("elapsed_time_FFT = " , elapsed_time_FFT)
                        

           
            # Timing pfb_fir_frontend_TINA_FFT
            xinput = np.random.rand(batchsize,*x.shape)
            elapsed_time_TINA_FFT_temp = runIPU(input_data = input_data)
            elapsed_time_TINA_FFT = comparedspeedup(elapsed_time_TINA_FFT, elapsed_time_TINA_FFT_temp)
            #print("elapsed_time_TINA_FFT = " , elapsed_time_TINA_FFT)
            

        # Average the elapsed times
        """
        elapsed_time /= Maxloop
        elapsed_time_FFT /= Maxloop
        elapsed_time_cp /= Maxloop
        elapsed_time_cp_FFT /= Maxloop
        elapsed_time_TINA /= Maxloop
        elapsed_time_TINA_FFT /= Maxloop
        elapsed_time_jax /= Maxloop
        elapsed_time_jax_fft /= Maxloop
        """

         # Calculate speedup for CuPy, Torch, and JAX compared to NumPy
        speedup_TINA_FFT = elapsed_time_FFT / elapsed_time_TINA_FFT
        
        #print("TINA: ", elapsed_time_TINA_FFT)
        #print("cupy: ", elapsed_time_cp_FFT)

        # Append the results to the list
        results.append({
            'Taps': M,
            'Branches': P,
            'bytes_x': bytes_x,
            'coef_bytes': coef_bytes,
            'elapsed_time_FFT_CPU': elapsed_time_FFT,
            'elapsed_time_TINA_FFT': elapsed_time_TINA_FFT,
            'speedup_TINA_FFT':speedup_TINA_FFT,
            

        })

# Create a DataFrame with the results
df = pd.DataFrame(results)

# Divide the values by Maxloop
#df[['elapsed_time', 'elapsed_time_FFT', 'elapsed_time_cp', 'elapsed_time_cp_FFT', 'elapsed_time_TINA', 'elapsed_time_TINA_FFT']] /= Maxloop

# Export the DataFrame to an Excel file
df.to_excel('output.xlsx', index=False)

In [None]:
"""
import torch

torch.manual_seed(0)

# Define model class
class SmallModel(torch.nn.Module):
    def __init__(self, input_size, output_size):
        super(SmallModel, self).__init__()
        self.fc = torch.nn.Linear(input_size, output_size)

    def forward(self, x):
        output = self.fc(x)
        return output
    

# Instantiate model and generate inputs
input_size = 10
output_size = 5
pytorch_model = SmallModel(input_size, output_size)

print(pytorch_model)
"""

### 2. Export to ONNX

The following code is used for exporting a PyTorch model (pytorch_model) to the ONNX (Open Neural Network Exchange) format. The ONNX file is needed to use the VitisAI Quantizer. 

In [6]:
"""
# Prep for ONNX export
inputs = {"x": torch.rand(input_size, input_size)}
input_names = ['input']
output_names = ['output']
dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
tmp_model_path = "models/pfb.onnx"

# Call export function
torch.onnx.export(
        pytorch_model,
        inputs,
        tmp_model_path,
        export_paradef createmodel(M, P):
    x = np.sin(np.arange(0, M * P * 10) / np.pi)
    win_coeffs = generate_win_coeffs(M, P, window_fn="hamming")
    
    
    
    
    x = np.random.rand(*x.shape).astype(np.float32)
    
    win_coeffs = torch.from_numpy(win_coeffs)
    
    # Timing pfb_fir_frontend_TINA_FFT
    xinput = np.random.rand(batchsize,*x.shape)
    PFB_layer = PFB_FIR_FFT(win_coeffs = win_coeffs, M = M, P = P, expected_input_size=xinput.shape[1])
    PFB_layer = PFB_layer.float()
    xinput = torch.from_numpy(xinput).float()
    tmp_model_path = "models/pfb.onnx"
    torch.onnx.export(
    PFB_layer,                     # model being run
    xinput,                 # model input (or a tuple for multiple inputs)
    tmp_model_path,                     # where to save the model
    export_params=True,            # store the trained parameter weights inside the model file
    opset_version=13,              # the ONNX version to export the model to
    input_names=['input'],         # the model's input names
    output_names=['output'],       # the model's output names
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}  # variable length axes
    )

    # `input_model_path` is the path to the original, unquantized ONNX model.
    input_model_path = "models/pfb.onnx"
    
    # `output_model_path` is the path where the quantized model will be saved.
    output_model_path = "models/pfb_quantized.onnx"
    
    vai_q_onnx.quantize_static(
    input_model_path,
    output_model_path,
    calibration_data_reader=None,
    quant_format=vai_q_onnx.QuantFormat.QDQ,
    calibrate_method=vai_q_onnx.PowerOfTwoMethod.MinMSE,
    activation_type=vai_q_onnx.QuantType.QUInt8,
    weight_type=vai_q_onnx.QuantType.QInt8,
    enable_ipu_cnn=True,
    extra_options={'ActivationSymmetric': True}
    )
    
ms=True,
        opset_version=13,  # Recommended opset
        input_names=input_names,
        output_names=output_names,
        dynamic_axes=dynamic_axes,
    )
"""    

'\n# Prep for ONNX export\ninputs = {"x": torch.rand(input_size, input_size)}\ninput_names = [\'input\']\noutput_names = [\'output\']\ndynamic_axes = {\'input\': {0: \'batch_size\'}, \'output\': {0: \'batch_size\'}}\ntmp_model_path = "models/pfb.onnx"\n\n# Call export function\ntorch.onnx.export(\n        pytorch_model,\n        inputs,\n        tmp_model_path,\n        export_params=True,\n        opset_version=13,  # Recommended opset\n        input_names=input_names,\n        output_names=output_names,\n        dynamic_axes=dynamic_axes,\n    )\n'

### 3. Quantize Model

Using the static quantization method provided by the Vitis AI Quantizer and providing the newly exported ONNX model, we'll quantize the model to INT8. For more information on this quantization method, see [Vitis AI ONNX Quantization](https://ryzenai.docs.amd.com/en/latest/vai_quant/vai_q_onnx.html).

### 4. Run Model

#### CPU Run

Before runnning the model on the IPU, we'll run the model on the CPU and get the execution time for comparison with the IPU. We'll also use the ONNX Runtime Profiling to get some more information about the inference. For more information on this, see [Profiling Tools](https://onnxruntime.ai/docs/performance/tune-performance/profiling-tools.html) from ONNX Runtime. 

#### IPU Run

Now, we'll run it on the IPU and time the execution so that we can compare the results with the CPU.

Let's gather our results and see what we have