# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

from torchvision.models import resnet18, ResNet18_Weights
import random
import copy
import matplotlib.pyplot as plt
import torch.nn as nn
import numpy as np
import torch
import torch.optim as optim
import torchvision
from torchvision.transforms import Resize
import torchvision.transforms as transforms
import torchvision.models as models
import torch.quantization
import torch.nn.functional as F
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import pandas as pd
import time

# use GPU if available
if torch.cuda.is_available():
        device = torch.device("cuda")
        print("GPU is available and being used.")
else:
        device = torch.device("cpu")
        print("GPU is not available, using CPU instead.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Mounted at /content/drive
GPU is not available, using CPU instead.
Using device: cpu


# ResNet18

In [2]:
class BasicBlock(nn.Module):
    expansion = 1  # No expansion in BasicBlock

    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.stride = stride

        # First convolutional layer
        self.conv1 = nn.Conv2d(
            in_channels, out_channels,
            kernel_size=kernel_size, stride=stride, padding=padding, bias=False
        )

        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        # Second convolutional layer
        self.conv2 = nn.Conv2d(
            out_channels, out_channels,
            kernel_size=kernel_size, stride=1, padding=padding, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Downsample layer for shortcut connection (if needed)
        self.downsample = downsample

    def forward(self, x):
        identity = x  # Save the input tensor for the shortcut

        # First layer
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        # Second layer
        out = self.conv2(out)
        out = self.bn2(out)

        # Apply downsampling to the identity if necessary
        if self.downsample is not None:
            identity = self.downsample(x)

        # Add the identity (shortcut connection)
        out += identity
        out = self.relu(out)

        return out

In [3]:
class ResNet18(nn.Module):
    def __init__(self, num_classes=1000):
        super(ResNet18, self).__init__()

        # Initial Convolution and Max Pool
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=64,
            kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Define layers using your BasicBlock
        self.layer1 = self._make_layer(64, 64, 2, stride=1)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)


        # Adaptive Average Pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # Fully connected layer
        self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes)

        # Initialize weights
        self._initialize_weights()

    def _make_layer(self, in_channels, out_channels, blocks, stride):
        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

        layers = []
        layers.append(BasicBlock(in_channels, out_channels, stride=stride, downsample=downsample))
        for _ in range(1, blocks):
            layers.append(BasicBlock(out_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

In [4]:
def fixed_point_quantize_weights(weights, total_bits, int_bits):
    frac_bits = total_bits - int_bits
    delta = 2 ** (-frac_bits)
    max_val = (2 ** (total_bits - 1) - 1) * delta
    min_val = -2 ** (total_bits - 1) * delta

    q_weights = torch.clamp(torch.round(weights / delta), min_val / delta, max_val / delta) * delta
    return q_weights

In [5]:
class QuantizedConv2d(nn.Conv2d):
    def __init__(self, *args, total_bits=8, weight_int_bits=2, input_int_bits=2, output_int_bits=2, **kwargs):
        super(QuantizedConv2d, self).__init__(*args, **kwargs)
        self.total_bits = total_bits
        self.weight_int_bits = weight_int_bits
        self.input_int_bits = input_int_bits
        self.output_int_bits = output_int_bits
    def forward(self, input):
        # quantize input
        quantized_input = fixed_point_quantize_weights(input, self.total_bits, self.input_int_bits)
        # quantize weights
        original_weights = self.weight.data
        quantized_weights = fixed_point_quantize_weights(original_weights, self.total_bits, self.weight_int_bits)
        output = F.conv2d(quantized_input, quantized_weights, self.bias, self.stride,
                          self.padding, self.dilation, self.groups)
        # quantize output
        quantized_output = fixed_point_quantize_weights(output, self.total_bits, self.output_int_bits)
        return quantized_output

In [6]:
## quantize conv
def quantize_conv2d(model, total_bits, weight_int_bits, input_int_bits, output_int_bits):
    for name, m in model.named_children():
        if isinstance(m, nn.Conv2d):
            new_layer = QuantizedConv2d(
                in_channels=m.in_channels,
                out_channels=m.out_channels,
                kernel_size=m.kernel_size,
                stride=m.stride,
                padding=m.padding,
                dilation=m.dilation,
                groups=m.groups,
                bias=(m.bias is not None),
                total_bits=total_bits,
                weight_int_bits=weight_int_bits,
                input_int_bits=input_int_bits,
                output_int_bits=output_int_bits
            )
            new_layer.weight.data = fixed_point_quantize_weights(m.weight.data.clone(), total_bits, weight_int_bits)
            if m.bias is not None:
                new_layer.bias.data = fixed_point_quantize_weights(m.bias.data.clone(), total_bits, weight_int_bits)

            setattr(model, name, new_layer)
        elif len(list(m.children())) > 0:
            quantize_conv2d(m, total_bits, weight_int_bits, input_int_bits, output_int_bits)

In [7]:
model = ResNet18(num_classes=100)
quantized_model = copy.deepcopy(model)

weight_int_bits = 2
input_int_bits = 3
output_int_bits = 3

quantize_conv2d(quantized_model, 8, weight_int_bits, input_int_bits, output_int_bits)
quantized_model = quantized_model.to(device)

# Save Weights

In [8]:
# Function to save tensor to binary file
def save_tensor_to_bin(tensor, filename):
    # Ensure the tensor is on CPU and convert to numpy array
    np_array = tensor.cpu().numpy()
    # Flatten the array to store it as a one-dimensional binary
    np_array.tofile(filename)

# Load the model
checkpoint_path = '/content/drive/My Drive/Colab Notebooks/checkpoints/quantized_checkpoint.pth'
checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))

# Load model state dict
model = quantized_model
model.load_state_dict(checkpoint['model_state_dict'])

# Iterate through the model's layers and save weights to .bin files
save_dir = '/content/drive/My Drive/Colab Notebooks/bin_files/'
for name, param in model.named_parameters():
    if 'weight' in name:  # Check if the parameter is a weight matrix
        # Construct the full file path
        filename = f"{save_dir}{name.replace('.', '_')}.bin"
        save_tensor_to_bin(param.data, filename)

  checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))


In [9]:
def save_bn_params_to_bin(bn_layer, filename, eps=1e-5):
    """
    Combines running_var and gamma into mult_factor and saves BN params.

    Args:
        bn_layer: The Batch Normalization layer.
        filename: The path to save the binary file.
        eps: A small value added to running_var for numerical stability.
    """
    # Calculate mult_factor
    mult_factor = bn_layer.weight / torch.sqrt(bn_layer.running_var + eps)

    # Stack parameters in the desired order
    params = torch.stack([
        bn_layer.running_mean,
        mult_factor,
        bn_layer.bias
    ])

    # Save to binary file
    np_array = params.cpu().detach().numpy()
    np_array.tofile(filename)

def save_all_bn_params(model, save_dir):
    """
    Iterates through the model and saves parameters for all BN layers.

    Args:
        model: The PyTorch model.
        save_dir: The directory to save the binary files.
    """
    for name, module in model.named_modules():
        if isinstance(module, nn.BatchNorm2d):  # Check if the module is a BN layer
            filename = f"{save_dir}{name.replace('.', '_')}_combined.bin"
            save_bn_params_to_bin(module, filename)

# Example usage:
save_dir = '/content/drive/My Drive/Colab Notebooks/bin_files/'
save_all_bn_params(model, save_dir)

# Other Test

In [12]:
import torch
import torch.nn as nn

# Create a batch of inputs (e.g., batch size 4, channels 3, height and width 5)
x = torch.randn(4, 3, 5, 5)

# Initialize the BatchNorm layer
bn = nn.BatchNorm2d(3)

# Apply PyTorch BatchNorm
bn_out = bn(x)

# Calculate mean and variance manually
mean = x.mean([0, 2, 3], keepdim=True)
var = x.var([0, 2, 3], keepdim=True, unbiased=False)
epsilon = 1e-5

# Normalize manually
y_norm = (x - mean) / torch.sqrt(var + epsilon)

# Apply gamma and beta (initialized to 1 and 0 in BatchNorm)
gamma = bn.weight.view(1, 3, 1, 1)
beta = bn.bias.view(1, 3, 1, 1)
manual_out = y_norm * gamma + beta

# Compare outputs
print("PyTorch BatchNorm Output:", bn_out)
print("Manual Calculation Output:", manual_out)


PyTorch BatchNorm Output: tensor([[[[-0.7487, -1.5081, -0.9805, -1.1135, -1.2354],
          [-0.4173,  0.8342, -0.4882, -1.5744,  1.1677],
          [ 0.3980, -0.7128,  2.4608,  1.0370, -0.4750],
          [ 0.3292, -2.1019, -0.1768, -0.1718, -0.5120],
          [-1.7837,  0.0476,  0.3870, -1.1638, -0.3408]],

         [[ 0.7610,  0.9965,  0.7050,  0.9551,  1.3295],
          [-0.1368, -0.7595, -0.1605,  0.3042, -1.4841],
          [ 0.0178, -0.1560,  1.4304, -0.6366,  0.7241],
          [ 0.9487,  1.9683, -0.5929,  0.1596,  1.8587],
          [-1.6693,  1.7225,  0.6456,  1.1036, -0.3665]],

         [[-0.5643, -1.3252,  1.4667,  0.4837,  0.6148],
          [-0.9203, -0.3524, -0.1319,  0.0644, -0.8730],
          [-2.1496,  0.9718, -0.3025, -1.0742, -0.5703],
          [ 1.5937, -1.4201,  0.8394,  1.0297, -0.4593],
          [-0.1068, -0.9915,  0.1236, -0.0887, -0.5852]]],


        [[[-0.1510, -1.0703, -0.2825,  0.5577, -0.1359],
          [-0.4731,  0.5029, -0.8611, -2.0631, -0.7074

In [16]:
# Define the size of the arrays
size = 24

# Create sample arrays
array_1 = np.array([idx * 16 for idx in range(size)], dtype=np.int16)  # Sequential numbers scaled by 16
array_2 = np.array([(5 - idx) * 16 for idx in range(size)], dtype=np.int16)  # (5 - idx) scaled by 16

# Save arrays to binary files
array_1.tofile(f"{save_dir}array_1.bin")
array_2.tofile(f"{save_dir}array_2.bin")

In [12]:
weights_int8 = np.fromfile('/content/drive/My Drive/Colab Notebooks/bin_files/conv1_weight.bin', dtype=np.int8)

print(weights_int8.min(), weights_int8.max())

-128 112
