In [None]:
import torch
import torch.nn as nn
import time

# Specify device as CPU
device = torch.device("cpu")

# Define the input tensor (batch_size=1, channels=1, height=5, width=5)
input_tensor = torch.randn(1, 1, 5, 5).to(device)

# Define a convolutional layer with a 5x5 kernel and move it to the CPU
conv_layer = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=5, bias=False).to(device)

# Warm-up (important to stabilize timings)
for _ in range(10):
    _ = conv_layer(input_tensor)

# Run multiple iterations and measure time
num_iterations = 1000
start_time = time.time_ns()  # Start time in nanoseconds
for _ in range(num_iterations):
    output = conv_layer(input_tensor)
end_time = time.time_ns()  # End time in nanoseconds

# Calculate the elapsed time
elapsed_time_ns = (end_time - start_time) / num_iterations  # Average time per iteration

print(f"Output:\n{output}")
print(f"Average time taken for convolution on CPU: {elapsed_time_ns:.2f} nanoseconds")
0.2 seconds

Output:
tensor([[[[0.2952]]]], grad_fn=<ConvolutionBackward0>)
Average time taken for convolution on CPU: 42324.40 nanoseconds


In [2]:
import torch
import torch.nn.functional as F

# Given 28x28 tensor (replace with your actual tensor)
tensor_28x28 = torch.tensor([[255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           211, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 136, 137,  54,
           211, 138,  72, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 217,  59,  44,   7,
            21,  11,   4,  11,  16,  16, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 170, 170,  59,   7,  15,
            13,  53, 105,   2,   2,   8, 126, 210, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 152,  29,   0,  51, 161,
           186, 186, 215, 125, 118,   8,  19,  57, 253, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 181,  37,   0,  65, 176,
           252, 255, 224, 221, 188,  13,   7,   7, 154, 248, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 235,  85,  17,  35,  35, 126,
           255, 255, 255, 255, 252,  71,  10,  27, 180, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 196,  84,   1,  40, 222,
           254, 255, 255, 255, 250, 120,  26,  27, 123, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  84,  23,   7, 123,
           199, 219, 251, 194,  67,  24,  14,  96,  96, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 244, 138,  46,   1,   7,
            37,  95, 193,  77,  67,   7,  37, 102, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 235, 118, 164,  60,   1,   0,
             0,  35,   6,   2,  16,  10,  85, 138, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 183, 116,  33,   4,  38,   9,   3,   0,
             0,   0,   0,  10,  53, 162, 253, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 114,  45,   4,  20,  15,  76,  28, 115,
           151,   0,   0,   1, 121, 224, 253, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255,  86,  86,  22,   0,  20,  96, 196, 177, 225,
           235,  73,  17,   0,  68, 173, 251, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 112,  21,   0,  99, 183, 252, 252, 244, 255,
           255, 213, 143,   0,   9,  54, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 186,  21,   0,  57, 210, 255, 255, 255, 255,
           255, 230, 230,  52,   2,   9, 154, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 155,   9,   9,   0, 140, 200, 219, 255, 255,
           255, 255, 167,  41,   0,  32, 222, 222, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255,  62,  22,  12,  73, 114, 222, 213, 213,
           255, 229, 124,  56,   0,  29, 170, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 220, 148,  12,   5,   1, 121, 101,  51,
           134,  50,   1,   1,   1,  29, 155, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255,  32,  32,   9,   1,   4,   7,
            12,   1,   1,  17,  89, 194, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  24,  50, 105,
            16,  31,  16,  77, 186, 241, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 105,
           180, 236, 185, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]])

# Step 1: Add padding of 2 to the tensor
padded_tensor = F.pad(tensor_28x28, pad=(2, 2, 2, 2), mode='constant', value=0)

# Step 2: Extract the 5x5 kernel around the first pixel
# The first pixel (0, 0) in the original tensor corresponds to (2, 2) in the padded tensor
kernel_5x5 = padded_tensor[0:5, 0:5]  # Centered at (2, 2) in padded tensor

# Print the result
print("5x5 Kernel Centered on First Pixel:")
print(kernel_5x5)

5x5 Kernel Centered on First Pixel:
tensor([[  0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0],
        [  0,   0, 255, 255, 255],
        [  0,   0, 255, 255, 255],
        [  0,   0, 255, 255, 255]])


In [70]:
# Re-run the corrected script due to a reset in the execution environment

def generate_coe_file_hex_with_padding(numbers, output_file):
    """
    Generate a COE file with a concatenated hexadecimal number,
    where each number is represented as an 8-bit signed binary value,
    and the last number is a 32-bit signed binary value with padding in MSB and the value in the LSB.

    Parameters:
        numbers (list of int): The input array of signed numbers.
        output_file (str): The name of the output COE file.
    """
    if not isinstance(numbers, list) or not all(isinstance(n, int) for n in numbers):
        raise ValueError("Input must be a list of signed integers.")

    if len(numbers) < 2:
        raise ValueError("Input list must contain at least two numbers.")

    binary_values = []

    # Process all numbers except the last as 8-bit signed binary
    for num in numbers[:-1]:
        if num < -128 or num > 127:
            raise ValueError(f"Number {num} exceeds the range of an 8-bit signed integer (-128 to 127).")

        # Convert to 8-bit signed binary (2's complement)
        binary = f"{num & 0xFF:08b}"  # Mask with 0xFF to ensure 8 bits
        binary_values.append(binary)

    # Process the last number as 32-bit signed binary with MSB padding and LSB value
    last_num = numbers[-1]
    if last_num < -2**31 or last_num > 2**31 - 1:
        raise ValueError(f"Last number {last_num} exceeds the range of a 32-bit signed integer (-2^31 to 2^31-1).")

    # Convert to 32-bit signed binary (2's complement), right-aligned
    last_binary = f"{last_num & 0xFFFFFFFF:032b}"  # Mask with 0xFFFFFFFF to ensure 32 bits

    # Combine all binary values
    concatenated_binary = "".join(binary_values) + last_binary

    # Convert binary string to hexadecimal
    concatenated_hex = f"{int(concatenated_binary, 2):X}".zfill(len(concatenated_binary) // 4)

    # Write to COE file
    # with open(output_file, "w") as coe_file:
        # coe_file.write("memory_initialization_radix=16;\n")
        # coe_file.write("memory_initialization_vector=\n")
        # coe_file.write(f"{concatenated_hex};\n")

    return concatenated_hex

# Input array of signed integers
numbers_1 = [
     42,   26,  -33,    0,  -15,   28,   18,   34,  -22,  -77,   14,   70,
          20, -103,  -28,  -10,   33,  -22,  -61,   -5,    3,   10,  -33,  -53,
          54, 6205  # Correctly handling the last number as 32-bit signed
]

numbers_2 = [
    -12,  -4, -32,  16, -13, -13,  21,  31,  16,  12,  17,  16,  -9,  12,
         17, -27,  20,  33,  22,  32,  17, -18,   4,  32, -26, -1411  # Correctly handling the last number as 32-bit signed
]
numbers_3 = [
    -15,   32,   55,   70,   68,   26,   35,   86,   58,   35,   42,   43,
          12,   -3,   -6,  -26,  -32,  -55,  -79,  -86,  -49,  -24, -102,  -94,
         -21, 2945  # Correctly handling the last number as 32-bit signed
]
numbers_4 = [
    -12,  -82, -126, -109,  -53,  -21,  -59,   14,  -27,  -36,   20,   57,
          26,   44,  -13,    5,   64,   60,   49,   10,   35,   25,   30,   28,
          46, 3288 # Correctly handling the last number as 32-bit signed
]
numbers_5 = [
    -5,  31,  22,  40,  21, -53,  33,  10,  33,  21, -45, -95, -25,  43,
        -42, -80, -34,  49,  52, -28, -79,  23, -32, -24,  11, 12748  # Correctly handling the last number as 32-bit signed
]
numbers_6 = [
    -69, -58,  -2,  18,  74,  13, -82, -78, -35,  60,  65,  34, -79, -96,
          3,  13,  36,  36, -86, -70,  39,  53,  74,  36, -44, 10969  # Correctly handling the last number as 32-bit signed
]

# Generate COE file in hexadecimal with the last number in the LSB and MSB padding
print(f'{generate_coe_file_hex_with_padding(numbers_1, "output_weights.coe")} channel: 1')
print(f'{generate_coe_file_hex_with_padding(numbers_2, "output_weights.coe")} channel: 2')
print(f'{generate_coe_file_hex_with_padding(numbers_3, "output_weights.coe")} channel: 3')
print(f'{generate_coe_file_hex_with_padding(numbers_4, "output_weights.coe")} channel: 4')
print(f'{generate_coe_file_hex_with_padding(numbers_5, "output_weights.coe")} channel: 5')
print(f'{generate_coe_file_hex_with_padding(numbers_6, "output_weights.coe")} channel: 6')


2A1ADF00F11C1222EAB30E461499E4F621EAC3FB030ADFCB360000183D channel: 1
F4FCE010F3F3151F100C1110F70C11E51421162011EE0420E6FFFFFA7D channel: 2
F1203746441A23563A232A2B0CFDFAE6E0C9B1AACFE89AA2EB00000B81 channel: 3
F4AE8293CBEBC50EE5DC14391A2CF305403C310A23191E1C2E00000CD8 channel: 4
FB1F162815CB210A2115D3A1E72BD6B0DE3134E4B117E0E80B000031CC channel: 5
BBC6FE124A0DAEB2DD3C4122B1A0030D2424AABA27354A24D400002AD9 channel: 6


In [73]:
import torch

# Quantized input tensor (5x5 region in uint8)
input_tensor = torch.tensor([
    [  0,   0,   0,   0, 0],
    [ 0,   0,   0,   0, 0],
    [   127,   127, 127,   0, 0],
    [  127,   127, 127,   0, 0],
    [127,   127, 127,   0, 0],
], dtype=torch.uint8)  # Shape: (5, 5)

# Quantized weights (5x5 kernel in int8)
weights_tensor = torch.tensor([
    [42,   26,  -33,    0,  -15],
    [ 28,   18,   34,  -22,  -77],
    [14,   70, 20, -103,  -28],
    [  -10,   33,  -22,  -61,   -5],
    [3,   10,  -33,  -53, 54],
], dtype=torch.int8)  # Shape: (5, 5)

# Quantization parameters
input_scale = 0.0078
input_zero_point = 127

output_scale = 0.0186  # Scale for input activations
output_zero_point = 0  # Zero-point for input activations

weight_scale = 0.004553669132292271  # Scale for weights
weight_zero_point = 0  # Zero-point for weights

bias_float = 0.2204  # Bias in floating-point
effective_scale = input_scale * weight_scale # Scale for the output
bias_quant = (bias_float / effective_scale)# Bias in quantized form
print("Bias quant (Quantized):", round(bias_quant))

# Step 2: Perform elementwise multiplication and summation
conv_sum = torch.sum((input_tensor) * (weights_tensor - weight_zero_point)).item()

# Step 3: Add the bias (in floating-point)
conv_sum_with_bias = conv_sum + bias_quant

# Step 4: Quantize the result back to 8-bit (uint8)
M = effective_scale / output_scale
output_quantized = output_zero_point + M * conv_sum_with_bias

# Step 5: Clamp to the valid uint8 range [0, 255]s
output_quantized = max(0, min(255, output_quantized))

# Display results
print("Convolution Result for Single Pixel:")
print("Output (With Bias):", conv_sum_with_bias)
print("Quantized Output (8-bit):", output_quantized)
print("M scale:", M)


Bias quant (Quantized): 6205
Convolution Result for Single Pixel:
Output (With Bias): 17000.196169398954
Quantized Output (8-bit): 32.463628742373196
M scale: 0.0019096031845096618
