In [36]:
import torch
import torch.nn as nn
import time

# Specify device as CPU
device = torch.device("cpu")

# Define the input tensor (batch_size=1, channels=1, height=5, width=5)
input_tensor = torch.randn(1, 1, 5, 5).to(device)

# Define a convolutional layer with a 5x5 kernel and move it to the CPU
conv_layer = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=5, bias=False).to(device)

# Warm-up (important to stabilize timings)
for _ in range(10):
    _ = conv_layer(input_tensor)

# Run multiple iterations and measure time
num_iterations = 1000
start_time = time.time_ns()  # Start time in nanoseconds
for _ in range(num_iterations):
    output = conv_layer(input_tensor)
end_time = time.time_ns()  # End time in nanoseconds

# Calculate the elapsed time
elapsed_time_ns = (end_time - start_time) / num_iterations  # Average time per iteration

print(f"Output:\n{output}")
print(f"Average time taken for convolution on CPU: {elapsed_time_ns:.2f} nanoseconds")

Output:
tensor([[[[0.1541]]]], grad_fn=<ConvolutionBackward0>)
Average time taken for convolution on CPU: 43033.40 nanoseconds


In [23]:
import torch
import torch.nn.functional as F

# Given 28x28 tensor (replace with your actual tensor)
tensor_28x28 = torch.tensor([[255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           211, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 136, 137,  54,
           211, 138,  72, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 217,  59,  44,   7,
            21,  11,   4,  11,  16,  16, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 170, 170,  59,   7,  15,
            13,  53, 105,   2,   2,   8, 126, 210, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 152,  29,   0,  51, 161,
           186, 186, 215, 125, 118,   8,  19,  57, 253, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 181,  37,   0,  65, 176,
           252, 255, 224, 221, 188,  13,   7,   7, 154, 248, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 235,  85,  17,  35,  35, 126,
           255, 255, 255, 255, 252,  71,  10,  27, 180, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 196,  84,   1,  40, 222,
           254, 255, 255, 255, 250, 120,  26,  27, 123, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  84,  23,   7, 123,
           199, 219, 251, 194,  67,  24,  14,  96,  96, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 244, 138,  46,   1,   7,
            37,  95, 193,  77,  67,   7,  37, 102, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 235, 118, 164,  60,   1,   0,
             0,  35,   6,   2,  16,  10,  85, 138, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 183, 116,  33,   4,  38,   9,   3,   0,
             0,   0,   0,  10,  53, 162, 253, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 114,  45,   4,  20,  15,  76,  28, 115,
           151,   0,   0,   1, 121, 224, 253, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255,  86,  86,  22,   0,  20,  96, 196, 177, 225,
           235,  73,  17,   0,  68, 173, 251, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 112,  21,   0,  99, 183, 252, 252, 244, 255,
           255, 213, 143,   0,   9,  54, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 186,  21,   0,  57, 210, 255, 255, 255, 255,
           255, 230, 230,  52,   2,   9, 154, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 155,   9,   9,   0, 140, 200, 219, 255, 255,
           255, 255, 167,  41,   0,  32, 222, 222, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255,  62,  22,  12,  73, 114, 222, 213, 213,
           255, 229, 124,  56,   0,  29, 170, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 220, 148,  12,   5,   1, 121, 101,  51,
           134,  50,   1,   1,   1,  29, 155, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255,  32,  32,   9,   1,   4,   7,
            12,   1,   1,  17,  89, 194, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  24,  50, 105,
            16,  31,  16,  77, 186, 241, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 105,
           180, 236, 185, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
          [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]])

# Step 1: Add padding of 2 to the tensor
padded_tensor = F.pad(tensor_28x28, pad=(2, 2, 2, 2), mode='constant', value=0)

# Step 2: Extract the 5x5 kernel around the first pixel
# The first pixel (0, 0) in the original tensor corresponds to (2, 2) in the padded tensor
kernel_5x5 = padded_tensor[0:5, 27:32]  # Centered at (2, 2) in padded tensor

# Print the result
print("5x5 Kernel Centered on First Pixel:")
print(kernel_5x5)

5x5 Kernel Centered on First Pixel:
tensor([[  0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0],
        [255, 255, 255,   0,   0],
        [255, 255, 255,   0,   0],
        [255, 255, 255,   0,   0]])


not working!

In [8]:
import torch

# Quantized input tensor (5x5 region in uint8)
xq = torch.tensor([
    [  0,   0,   0,   0, 0],
    [ 0,   0,   0,   0, 0],
    [   0,   0, 254, 254, 254],
    [  0,   0,254, 254, 254],
    [0,   0, 254, 254, 254],
], dtype=torch.uint8)  # Shape: (5, 5)

# Quantized weights (5x5 kernel in int8)
wq = torch.tensor([
    [-9,   48,   20,   54,  -16],
    [ 74,   72,   17,   34,  -11],
    [-15,   13,-3,  -18,   42],
    [   -98,  -65,   13,  -37,    7],
    [-84, -127,  -31,   17,25],
], dtype=torch.int8)  # Shape: (5, 5)

# Quantization parameters
input_scale = 0.0078
input_zero_point = 127

output_scale = 0.0108  # Scale for input activations
output_zero_point = 0  # Zero-point for input activations

weight_scale = 0.004025917034596205  # Scale for weights
weight_zero_point = 0  # Zero-point for weights

bias_float = 0.2684  # Bias in floating-point
effective_scale = input_scale * weight_scale # Scale for the output

xq_signed = xq.to(torch.int16) - input_zero_point  # Shift to zero-centered

z = torch.sum(xq_signed.to(torch.int8) * wq.to(torch.int8)).item()

bias_q  = round(bias_float / effective_scale) # Bias in quantized form
print("Bias quant (Quantized):", bias_q)

z_int = z + bias_q

# Step 4: Quantize the result back to 8-bit (uint8)
M = effective_scale / output_scale
z_out = round(z_int * M - output_zero_point)  # Quantized output

# Step 5: Clamp to the valid uint8 range [0, 255]s
output_quantized = max(0, min(255, z_out))

# Display results
print("Output (With Bias):", z_int)
print("Quantized Output (8-bit):", output_quantized)
print("M scale:", M)


Bias quant (Quantized): 8547
Output (With Bias): 8951
Quantized Output (8-bit): 26
M scale: 0.0029076067472083695


create coe files for conv1,2

In [4]:
def generate_coe_file_hex_with_padding(numbers1, numbers2, numbers3, numbers4, numbers5, numbers6, numbers7, numbers8, numbers9, numbers10, numbers11, numbers12,output_file):
    """
    Generate a COE file with two concatenated hexadecimal numbers.
    Each list is processed as follows:
      - All but the last number are represented as 8-bit signed binary values (2's complement).
      - The last number is represented as a 32-bit signed binary value (2's complement).

    The output file will have:
      memory_initialization_radix=16;
      memory_initialization_vector=
      <hex_for_1_array> <hex_for_2_array> <hex_for_3_array> <hex_for_4_array> <hex_for_5_array> <hex_for_6_array>;

    Parameters:
        numbers1 (list of int): The first input array of signed integers.
        numbers2 (list of int): The second input array of signed integers.
        numbers3 (list of int): The second input array of signed integers.
        numbers4 (list of int): The second input array of signed integers.
        numbers5 (list of int): The second input array of signed integers.
        numbers6 (list of int): The second input array of signed integers.
        output_file (str): The name of the output COE file.
    """
    def process_numbers(numbers):
        if not isinstance(numbers, list) or not all(isinstance(n, int) for n in numbers):
            raise ValueError("Each input must be a list of signed integers.")

        if len(numbers) < 2:
            raise ValueError("Each input list must contain at least two numbers.")

        binary_values = []

        # Process all numbers except the last as 8-bit signed binary
        for num in numbers[:-1]:
            if num < -128 or num > 127:
                raise ValueError(f"Number {num} exceeds the range of an 8-bit signed integer (-128 to 127).")
            binary = f"{num & 0xFF:08b}"  # 8-bit 2's complement
            binary_values.append(binary)

        # Process the last number as 32-bit signed binary
        last_num = numbers[-1]
        if last_num < -2**31 or last_num > 2**31 - 1:
            raise ValueError(f"Last number {last_num} exceeds the range of a 32-bit signed integer.")
        last_binary = f"{last_num & 0xFFFFFFFF:032b}"

        # Combine all binary values
        concatenated_binary = "".join(binary_values) + last_binary

        # Convert binary string to hexadecimal
        concatenated_hex = f"{int(concatenated_binary, 2):X}".zfill(len(concatenated_binary) // 4)
        return concatenated_hex

    # Process both arrays
    hex1 = process_numbers(numbers1)
    hex2 = process_numbers(numbers2)
    hex3 = process_numbers(numbers3)
    hex4 = process_numbers(numbers4)
    hex5 = process_numbers(numbers5)
    hex6 = process_numbers(numbers6)
    hex7 = process_numbers(numbers7)
    hex8 = process_numbers(numbers8)
    hex9 = process_numbers(numbers9)
    hex10 = process_numbers(numbers10)
    hex11 = process_numbers(numbers11)
    hex12 = process_numbers(numbers12)


    # Write to COE file
    with open(output_file, "w") as coe_file:
        coe_file.write("memory_initialization_radix=16;\n")
        coe_file.write("memory_initialization_vector=\n")
        coe_file.write(f"{hex1} {hex2} {hex3} {hex4} {hex5} {hex6} {hex7} {hex8} {hex9} {hex10} {hex11} {hex12};\n")

    return hex1, hex2, hex3, hex4, hex5, hex6, hex7, hex8, hex9, hex10, hex11, hex12

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
import csv
import torch.quantization as quant
import warnings
from PIL import Image

warnings.filterwarnings(
    "ignore",
    message=".*weights_only=False.*",  # Match the specific warning message
    category=FutureWarning
)

def save_tensor_to_csv_as_matrix(tensor, filename):
    """
    Save a PyTorch tensor to a CSV file, formatted as matrices for each channel.
    Each matrix represents the spatial dimensions (Height x Width) for a channel,
    with rows separated by commas.

    Parameters:
        tensor (torch.Tensor): The tensor to save (e.g., Batch x Channels x Height x Width).
        filename (str): The name of the CSV file to create.
    """
    # Handle quantized tensors
    if tensor.is_quantized:
        tensor = tensor.int_repr()  # Extract the integer representation

    # Convert the tensor to a NumPy array
    numpy_array = tensor.detach().cpu().numpy()

    # Ensure the tensor has 4 dimensions: [Batch, Channels, Height, Width]
    if len(numpy_array.shape) != 4:
        raise ValueError("Tensor must have 4 dimensions (Batch x Channels x Height x Width).")

    batch_size, num_channels, height, width = numpy_array.shape

    if batch_size != 1:
        raise ValueError("Only batch size of 1 is supported.")

    # Open the file and write matrices for each channel
    with open(filename, mode="w") as file:
        for channel in range(num_channels):
            file.write(f"Channel {channel + 1}:\n")  # Channel header
            for i, row in enumerate(numpy_array[0, channel]):  # Iterate over height (rows)
                file.write(f"{list(row)}")  # Format each row as a list
                if i < height - 1:
                    file.write(",\n")  # Add a comma between rows
                else:
                    file.write("\n")  # No comma after the last row
            file.write("\n")  # Add a newline between channels

    print(f"Tensor saved to {filename} in matrix format.")

class LeNet5(nn.Module):
    def __init__(self, num_classes=33):
        super(LeNet5, self).__init__()
        self.quant = torch.ao.quantization.QuantStub()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, num_classes)
        self.dequant = torch.ao.quantization.DeQuantStub()
        self.print_activation = False  # Add a flag to control printing

    def forward(self, x):
        x = self.quant(x)
        if self.print_activation:
            # print("quant output shape:", x)
            # save_tensor_to_csv_as_matrix(x, "image_output.csv")
            pass
        x = self.conv1(x)
        x = self.relu1(x)
        if self.print_activation:
            # print("Conv1 output shape:", x.int_repr())
            # save_tensor_to_csv_as_matrix(x, "relu1_output.csv")
            pass
        x = self.pool1(x)
        if self.print_activation:
            # print("Conv1 output shape:", x.int_repr())
            # save_tensor_to_csv_as_matrix(x, "pool1_output.csv")
            pass
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.reshape(-1, 16 * 5 * 5)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        x = self.relu4(x)
        x = self.fc3(x)
        x = self.dequant(x)
        return x

def load_quantized_model_and_labels(model_path, num_classes, device):
    # Re-create the float model
    model = LeNet5(num_classes=num_classes)
    model.eval()

    # Set the same QAT configuration as before
    custom_qconfig = quant.QConfig(
        activation=quant.FakeQuantize.with_args(observer=quant.MinMaxObserver, quant_min=0, quant_max=255, dtype=torch.quint8, qscheme=torch.per_tensor_affine),
        weight=quant.FakeQuantize.with_args(observer=quant.MinMaxObserver, quant_min=-128, quant_max=127, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)
    )

    # Apply the custom QConfig to the model
    model.qconfig = custom_qconfig

    # Fuse modules just like during training
    model_fused = torch.ao.quantization.fuse_modules(
        model,
        [['conv1', 'relu1'], ['conv2', 'relu2'], ['fc1', 'relu3'], ['fc2', 'relu4']]
    )

    # Prepare for QAT (simulates the same steps taken during training)
    model_prepared = torch.ao.quantization.prepare_qat(model_fused.train(), inplace=True)

    # Switch to eval mode before feeding dummy data
    model_prepared.eval()

    dummy_input = torch.randn(1, 1, 28, 28).to(device)
    with torch.no_grad():
        _ = model_prepared(dummy_input)  # Populates the observers

    # Now convert the model to quantized form after observers have data
    model_int8 = torch.ao.quantization.convert(model_prepared.eval())

    # Load the saved quantized model weights and labels
    checkpoint = torch.load(model_path, map_location=device)
    model_int8.load_state_dict(checkpoint['model_state_dict'])

    # Check if 'labels_mapping' exists in the checkpoint
    labels_mapping = checkpoint.get('labels_mapping', None)  # Default to None if key doesn't exist

    # Move to device and set to eval mode
    model_int8.to(device)
    model_int8.eval()

    if labels_mapping is not None:
        return model_int8, labels_mapping
    else:
        return model_int8, None

In [6]:
def create_coe_files_convs(path, num_classes):
    # Example usage (ensure you have defined test_loader and dataset.int_to_char):
    model_path = path  # Path to your saved quantized model
    device = torch.device("cpu")
      # Update with your number of classes

    # Input name, Activation Name
    names_list = [['conv1','conv2']]
    for layer in range(len(names_list)):
        # Model parameters
        model_int8, labels  = load_quantized_model_and_labels(model_path, num_classes, device)
        input_scale = model_int8.state_dict()[f'{names_list[layer][0]}.scale']
        input_zero_point = model_int8.state_dict()[f'{names_list[layer][0]}.zero_point']
        weight_scale = model_int8.state_dict()[f'{names_list[layer][1]}.weight'].q_scale()
        weight_zero_point = model_int8.state_dict()[f'{names_list[layer][1]}.weight'].q_zero_point()
        effective_scale = input_scale * weight_scale # Scale for the output

        # Creating weights and Biases Lists
        active_weights = model_int8.state_dict()[f'{names_list[layer][1]}.weight'].int_repr()
        active_biases = model_int8.state_dict()[f'{names_list[layer][1]}.bias']
        active_biases_list = active_biases.tolist()
        q_biases = []
        for i in range(len(active_biases_list)):
            q_value = float(active_biases_list[i]) / effective_scale
            q_biases.append(round(float(q_value)))

        parameters_list = []
        # Print weights for each channel with separation
        num_output_channels = active_weights.shape[0]  # Number of channels in conv1
        num_input_channels = active_weights.shape[1]
        print(active_weights.size())
        for output_channel in range(num_output_channels):
            # Convert the first 25 weights to a list
                for input_channel in range(num_input_channels):

                    channel_list = active_weights[output_channel][input_channel].reshape(-1)[:25].tolist()
                    channel_list.append(q_biases[output_channel])  # Append the bias of the current channel
                    parameters_list.append(channel_list)

        print(f'Num of Channels in Layer {layer+2} is: {len(parameters_list)}')
        # print(len(parameters_list))
        counter = 0
        for i in range(0,len(parameters_list)-1,12):
            # print(parameters_list[i], i)

            print(f'{generate_coe_file_hex_with_padding(parameters_list[i], parameters_list[i+1],parameters_list[i+2],parameters_list[i+3],parameters_list[i+4],parameters_list[i+5],parameters_list[i+6], parameters_list[i+7],parameters_list[i+8],parameters_list[i+9],parameters_list[i+10],parameters_list[i+11],f'Layer{layer+2}_Channel{counter+1}_{counter+2}.coe')} Channel: {i+1}')
            counter = counter + 2

create_coe_files_convs('lenetV5.pth', 33)

torch.Size([16, 6, 5, 5])
Num of Channels in Layer 2 is: 96
('04142732151018222C1F1E1A2226032310181EFD0710042108FFFFFE8D', 'F8FD07F4F0F7F5FEF1F9F8F7F4F2F5F904F7FE01F0FBF60503FFFFFE8D', '0E0E04FAFD020C0807E90605F5DBF210EEF3F317FCFB020B03FFFFFE8D', '02F601090408FEECF6FE0EF402F7F206F20007FD05FC080813FFFFFE8D', '0B2A2519120F2528181310211D121D0D251218FEF812FE05FDFFFFFE8D', '141101120AF4FE090B00FBFAFB15080BFC010C1003FDF21411FFFFFE8D', '03F4F9CFE706D1D0EDE5D2B1EF07EAE3EA1111011F12030F05FFFFFD1A', 'F7F3EDF2FBF6F8030805090609FBFEFAF8F5FAFAFEFDF0FBFAFFFFFD1A', '17180B00F50BFE190C05F60710090F2B2B1F131709FE02F600FFFFFD1A', '01F70B0901133B382B1416372D14EE09ECF1F1DC0F0A07F7EAFFFFFD1A', 'FFECD7C8E009F3DED20B0ADCD5DF0711F508F7FE1028160D11FFFFFD1A', '0D0C191FEB0815130D05ED00110BF313FFFFEDFFE8F901F1F4FFFFFD1A') Channel: 1
('B29ECEC0B7D5CCCABCA10903F0D29B0E1F0903B91B162D16AC000003FF', '0303FE01060407FB08FF050000FC030105FBF80701FA0BF805000003FF', 'D1E3B8C9C51212F7EEC821250FFEE5081504F2E2F7090A0DF9000003FF

In [2]:
numbers = range(0,120)  # Replace the ellipsis with the actual numbers or generate them
prefix = "to_signed("    # The text you want before the number
suffix = ", 8),"   # The text you want after the number

# Generate the new list with text added on both sides of each number
modified_numbers = [f"{prefix} {number} {suffix}" for number in numbers]

# Print or use the modified numbers as needed
print(modified_numbers)


['to_signed( 0 , 8),', 'to_signed( 1 , 8),', 'to_signed( 2 , 8),', 'to_signed( 3 , 8),', 'to_signed( 4 , 8),', 'to_signed( 5 , 8),', 'to_signed( 6 , 8),', 'to_signed( 7 , 8),', 'to_signed( 8 , 8),', 'to_signed( 9 , 8),', 'to_signed( 10 , 8),', 'to_signed( 11 , 8),', 'to_signed( 12 , 8),', 'to_signed( 13 , 8),', 'to_signed( 14 , 8),', 'to_signed( 15 , 8),', 'to_signed( 16 , 8),', 'to_signed( 17 , 8),', 'to_signed( 18 , 8),', 'to_signed( 19 , 8),', 'to_signed( 20 , 8),', 'to_signed( 21 , 8),', 'to_signed( 22 , 8),', 'to_signed( 23 , 8),', 'to_signed( 24 , 8),', 'to_signed( 25 , 8),', 'to_signed( 26 , 8),', 'to_signed( 27 , 8),', 'to_signed( 28 , 8),', 'to_signed( 29 , 8),', 'to_signed( 30 , 8),', 'to_signed( 31 , 8),', 'to_signed( 32 , 8),', 'to_signed( 33 , 8),', 'to_signed( 34 , 8),', 'to_signed( 35 , 8),', 'to_signed( 36 , 8),', 'to_signed( 37 , 8),', 'to_signed( 38 , 8),', 'to_signed( 39 , 8),', 'to_signed( 40 , 8),', 'to_signed( 41 , 8),', 'to_signed( 42 , 8),', 'to_signed( 43 , 8),

In [3]:
numbers = [-10, 21, 26, -106, -7, -40, 28, 43, 18, -29, -73, 5, 54, 38, 32, -106, 26, 82, 83, -125, -40, 10, 103, -25, -89]  # Example numbers

# The function call format
formatted_numbers = [f"    to_signed({number:4d}, 8)," for number in numbers]

# Print each formatted line
for line in formatted_numbers:
    print(line)


    to_signed( -10, 8),
    to_signed(  21, 8),
    to_signed(  26, 8),
    to_signed(-106, 8),
    to_signed(  -7, 8),
    to_signed( -40, 8),
    to_signed(  28, 8),
    to_signed(  43, 8),
    to_signed(  18, 8),
    to_signed( -29, 8),
    to_signed( -73, 8),
    to_signed(   5, 8),
    to_signed(  54, 8),
    to_signed(  38, 8),
    to_signed(  32, 8),
    to_signed(-106, 8),
    to_signed(  26, 8),
    to_signed(  82, 8),
    to_signed(  83, 8),
    to_signed(-125, 8),
    to_signed( -40, 8),
    to_signed(  10, 8),
    to_signed( 103, 8),
    to_signed( -25, 8),
    to_signed( -89, 8),


In [1]:
import os
import shutil
import random

def copy_sample_images(
    source_directory,
    destination_directory='image_testing',
    images_per_folder=15
):
    """
    Copies a sample of images (default 15) from each label folder in the source_directory
    into a new 'image_testing' folder with matching label subfolders.
    """

    # Ensure the destination top-level folder exists
    os.makedirs(destination_directory, exist_ok=True)

    # Iterate over each label folder in the source
    for label_folder in os.listdir(source_directory):
        label_path = os.path.join(source_directory, label_folder)

        # Ensure we only process directories
        if os.path.isdir(label_path):
            # Create the corresponding label folder under 'image_testing'
            dest_label_path = os.path.join(destination_directory, label_folder)
            os.makedirs(dest_label_path, exist_ok=True)

            # List all image files in the label folder
            images = [
                f for f in os.listdir(label_path)
                if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))
            ]

            # Sort or randomize the image list
            images.sort()
            # For a random sample instead of the first 15, uncomment the next line:
            random.shuffle(images)

            # Select the first N images (default 15)
            selected_images = images[:images_per_folder]

            # Copy selected images
            for img in selected_images:
                source_img = os.path.join(label_path, img)
                dest_img = os.path.join(dest_label_path, img)
                shutil.copyfile(source_img, dest_img)

    print("Sample images have been copied successfully.")


if __name__ == "__main__":
    # Update the path to your 33-label-folder directory
    source_directory = "D:/pynq/FinalProject/dataset"
    # Optionally, update the destination directory name if desired
    destination_directory = "image_testing"

    copy_sample_images(source_directory, destination_directory, images_per_folder=100)


Sample images have been copied successfully.


In [10]:
import os
import torch
import torch.nn as nn
import time

# ------------------------------------------------------------------------------
# 1. Set environment variables BEFORE importing torch or any NumPy/Scipy libs
# ------------------------------------------------------------------------------
os.environ["OMP_NUM_THREADS"] = "1"   # Controls number of threads for OpenMP
os.environ["MKL_NUM_THREADS"] = "1"   # Controls number of threads for MKL (used by NumPy/PyTorch)
os.environ["NUMEXPR_NUM_THREADS"] = "1"  # If you use numexpr
os.environ["OPENBLAS_NUM_THREADS"] = "1" # If you use OpenBLAS
os.environ["VECLIB_MAXIMUM_THREADS"] = "1" # If you use Apple's Accelerate/vecLib

# ------------------------------------------------------------------------------
# 2. Set the number of threads within PyTorch
# ------------------------------------------------------------------------------
torch.set_num_threads(1)
# torch.set_num_interop_threads(1)

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 5 * 5, 64)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(64, 33)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.reshape(-1, 16 * 5 * 5)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x


# Force CPU for timing simplicity
device = torch.device("cpu")

# Instantiate the model and move to CPU
model = SimpleCNN().to(device)

# Switch to evaluation mode
model.eval()

runs = 1000
sum_total_time = 0

with torch.no_grad():
    for _ in range(runs):
        # Create a new random input for each run
        dummy_input = torch.randn((1, 1, 28, 28), device=device, dtype=torch.float32)

        start_time = time.time()
        _ = model(dummy_input)
        end_time = time.time()
        duration = end_time - start_time

        sum_total_time += end_time - start_time

avg_time_per_run = sum_total_time / runs
print(f"Average inference time over {runs} runs: {avg_time_per_run * 1000} ms")


Average inference time over 1000 runs: 0.2627084255218506 ms


In [41]:
# --- Batch colour inversion ---------------------------------------------------
# 1. Change SRC_DIR below to the folder that contains your images (or sub-folders)
# 2. Set INPLACE = True  if you want to overwrite the originals  (⚠ irreversible)
#    Set INPLACE = False if you want all inverted images written to SRC_DIR+"_inverted"
# 3. Run the cell

from pathlib import Path
from PIL import Image, ImageOps
from tqdm.auto import tqdm   # progress bar

# ------------------------------------------------------------------ settings --
SRC_DIR  = r"D:/FinalProject/dataset/="   # <----- put your folder here
INPLACE  = False                          # True = overwrite originals
RECURSE  = True                           # descend into sub-folders too?

# List of accepted image extensions (lowercase)
VALID_EXTS = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".webp"}

# ----------------------------------------------------------------- functions --
def invert_image_file(src_path: Path, dst_path: Path):
    """Load one image, invert colours, save to dst_path."""
    with Image.open(src_path) as im:
        if im.mode not in ("L", "RGB"):
            im = im.convert("RGB")
        inv = ImageOps.invert(im)
        dst_path.parent.mkdir(parents=True, exist_ok=True)
        inv.save(dst_path)

# -------------------------------------------------------------------- run ----
src_path = Path(SRC_DIR).expanduser().resolve()
dst_root = src_path if INPLACE else src_path.with_name(src_path.name + "_inverted")

pattern  = "**/*" if RECURSE else "*"
files    = [p for p in src_path.glob(pattern) if p.suffix.lower() in VALID_EXTS]

if not files:
    print("⚠️  No images found – check path or extensions.")
else:
    for f in tqdm(files, desc="Inverting", unit="img"):
        dst = f if INPLACE else dst_root / f.relative_to(src_path)
        invert_image_file(f, dst)

    print("✅  Done.  Inverted", len(files), "images.")


Inverting: 100%|██████████| 634/634 [00:00<00:00, 1642.66img/s]

✅  Done.  Inverted 634 images.





In [21]:
"""
Pad every image in IN_DIR with a uniform black border.

Works for white glyphs on a black background.
If your colours are inverted, set INVERT = True.
"""

from pathlib import Path
import cv2          # pip install opencv-python
import numpy as np

# ─── USER SETTINGS ────────────────────────────────────────────────────────────
IN_DIR   = Path(r"D:/FinalProject/dataset/pi")   # ← CHANGE ME
PAD      = 15        # pixels of padding you want on each side
MAKE_SQUARE = True  # keep aspect ratio but pad until width == height
RESIZE_TO   = (28, 28)      # set to None to skip resizing
INVERT      = True         # True if your glyphs are black on white
THICKEN = 2      # how many pixels to grow each stroke
# ──────────────────────────────────────────────────────────────────────────────

OUT_DIR = IN_DIR.parent / "padded"
OUT_DIR.mkdir(exist_ok=True)

def pad_one(img_gray: np.ndarray) -> np.ndarray:
    """
    1.  Crop to the glyph
    2.  Thicken the strokes via morphological dilation
    3.  Paste into a centred black canvas (with padding / optional square)
    4.  Optionally resize
    """
    # ensure glyph is white (255) on black (0)
    fg = 255 - img_gray if INVERT else img_gray

    # binary mask of the glyph
    _, mask = cv2.threshold(fg, 0, 255,
                            cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # ─── 2. THICKEN ───────────────────────────────────────────────────────────
    if THICKEN > 0:
        k = 2 * THICKEN + 1                       # e.g. 2-px → 5×5 kernel
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k))
        mask = cv2.dilate(mask, kernel, iterations=1)

    ys, xs = np.where(mask)
    if ys.size == 0:                              # empty -> return original
        return img_gray

    # crop to tight bounding box
    y0, x0, y1, x1 = ys.min(), xs.min(), ys.max(), xs.max()
    glyph = mask[y0:y1 + 1, x0:x1 + 1]            # binary after thickening
    glyph = (glyph > 0).astype(np.uint8) * 255    # back to 0/255

    # ─── 3. centre on a canvas with padding ──────────────────────────────────
    h, w = glyph.shape
    if MAKE_SQUARE:
        side = max(h, w) + 2 * PAD
        canvas = np.zeros((side, side), dtype=np.uint8)
        y_off = (side - h) // 2
        x_off = (side - w) // 2
    else:
        canvas = np.zeros((h + 2 * PAD, w + 2 * PAD), dtype=np.uint8)
        y_off = PAD
        x_off = PAD

    canvas[y_off:y_off + h, x_off:x_off + w] = glyph

    # ─── 4. resize if requested ──────────────────────────────────────────────
    if RESIZE_TO is not None:
        canvas = cv2.resize(canvas, RESIZE_TO, interpolation=cv2.INTER_AREA)

    return canvas


# ─── MAIN LOOP ────────────────────────────────────────────────────────────────
img_exts = ("*.png", "*.jpg", "*.jpeg", "*.bmp")
for ext in img_exts:
    for img_path in IN_DIR.glob(ext):
        gray = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
        if gray is None:
            print(f"⚠️  Skipping {img_path.name}: not an image")
            continue

        padded_img = pad_one(gray)
        cv2.imwrite(str(OUT_DIR / img_path.name), padded_img)

print(f"✅ Done! Padded images saved to: {OUT_DIR.resolve()}")


✅ Done! Padded images saved to: D:\FinalProject\dataset\padded


In [24]:
from pathlib import Path
import pandas as pd, numpy as np
from PIL import Image
from tqdm import tqdm               # progress bar – optional

CSV_PATH  = Path("D:/FinalProject/dataset/digits.csv")   # 699 MB file
DEST_ROOT = Path("D:/FinalProject/dataset/AZ_images")                  # will hold A…Z sub-folders

# Map 0-25 ➜ 'A'-'Z'
label_to_char = {i: chr(i + 65) for i in range(26)}

# read in manageable chunks (keeps RAM <3 GB even on 16 GB laptops)
for chunk in pd.read_csv(CSV_PATH, header=None, chunksize=50_000):
    labels  = chunk.iloc[:, 0].astype(int).to_numpy()
    pixels  = chunk.iloc[:, 1:].to_numpy(dtype=np.uint8)

    for idx, (lab, flat) in enumerate(zip(labels, pixels)):
        img = flat.reshape(28, 28)           # 784 ➜ 28 × 28 matrix
        char_dir = DEST_ROOT / label_to_char[lab]
        char_dir.mkdir(parents=True, exist_ok=True)
        Image.fromarray(img, mode="L").save(char_dir / f"{lab}_{idx:06d}.png")