In [94]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import datetime
import numpy as np

In [81]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [82]:
train_data = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(train_data, batch_size=4, shuffle=True, num_workers=2)

test_data = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(test_data, batch_size=4, shuffle=False, num_workers=2)


classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck')

Files already downloaded and verified
Files already downloaded and verified


In [83]:
alexnet = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)

alexnet.classifier[4] = nn.Linear(4096,1024)
alexnet.classifier[6] = nn.Linear(1024,10)


alexnet.load_state_dict(torch.load('model_20240603_151633_final_frozen_alexnet'))
alexnet.eval()

Using cache found in C:\Users\Elijah/.cache\torch\hub\pytorch_vision_v0.10.0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [79]:
# alexnet.features[0].weight.dtype

torch.float32

In [91]:
# # quantize_requantize(alexnet.features[0].weight.detach(), torch.float32, torch.int8)
# type(torch.finfo(torch.float32).max)

float

In [95]:
def get_minmax(dt):
    try:
        max = torch.finfo(dt).max
        min = torch.finfo(dt).min
    except:
        max = torch.iinfo(dt).max
        min = torch.iinfo(dt).min
    return min,max
    
def quantize_requantize(mat, initial_dt, target_dt):
    i_bounds = (torch.min(mat), torch.max(mat))
    t_bounds = get_minmax(target_dt)
    S = np.float128(t_bounds[1] - t_bounds[0]) / np.float128(i_bounds[1] - i_bounds[0])
    print(S)
    # mat.apply_(lambda x: (x * S))
    mat *= S
    mat = mat.to(target_dt)
    # print(mat)
    mat = mat.to(initial_dt)
    mat /= S
    # mat.apply_(lambda x: (x / S))
    return mat

In [13]:
# def get_minmax(dt):
#     try:
#         max = torch.finfo(dt).max
#         min = torch.finfo(dt).min
#     except:
#         max = torch.iinfo(dt).max
#         min = torch.iinfo(dt).min
#     return min,max
    
# def calc_sf(mat, initial_dt, target_dt): 
#     i_bounds = (torch.min(mat), torch.max(mat))
#     t_bounds = get_minmax(target_dt)
#     S = (t_bounds[1] - t_bounds[0]) / (i_bounds[1] - i_bounds[0])
#     return S

# def block_linear_quantization(mat: torch.tensor , block_size, initial_dt, target_dt, orig_shape = None):
#     if orig_shape == None:
#         orig_shape = mat.shape
#     if len(mat.shape) > 2:
#         for i in range(mat.shape[0]):
#             mat[i] = block_linear_quantization(mat[i], block_size, initial_dt, target_dt, orig_shape)
#     else:
#         # test without need for padding
#         start_ix = 0
#         start_iy = 0
#         # Padding operation
#         if orig_shape[-2] % block_size != 0:
#             start_ix -= orig_shape[-2] % block_size
#         if orig_shape[-1] % block_size != 0:
#             start_iy -= orig_shape[-1] % block_size
#         for x in range(start_ix,mat.shape[-2]):
#             for y in range(start_iy,mat.shape[-1]):
#                 submat = mat[x:x + block_size, y:y + block_size]
#                 S = calc_sf(submat,torch.float32, torch.int8)
#                 mat[x:x + block_size, y:y + block_size] = submat

# Per Tensor Quantization

In [30]:
for layer in [*alexnet.features,*alexnet.classifier]:
    try:
        layer.weight = nn.parameter.Parameter(quantize_requantize(layer.weight.detach(), torch.float32, torch.int8))
        print(layer.weight)
    except (TypeError, AttributeError):
        pass


Parameter containing:
tensor([[[[ 1.1863e-01,  9.4048e-02,  9.5411e-02,  ...,  5.5816e-02,
            2.1566e-02,  4.9946e-02],
          [ 7.4867e-02,  3.8940e-02,  5.2960e-02,  ...,  2.5707e-02,
           -1.1294e-02,  4.1403e-03],
          [ 7.5417e-02,  3.8757e-02,  5.4925e-02,  ...,  4.3578e-02,
            1.0220e-02,  1.3233e-02],
          ...,
          [ 9.3131e-02,  1.0372e-01,  6.7529e-02,  ..., -2.0275e-01,
           -1.2838e-01, -1.1218e-01],
          [ 4.3526e-02,  6.4909e-02,  3.6162e-02,  ..., -2.0246e-01,
           -1.1375e-01, -1.0718e-01],
          [ 4.7352e-02,  6.2524e-02,  2.4737e-02,  ..., -1.1842e-01,
           -9.5542e-02, -8.3881e-02]],

         [[-7.2613e-02, -5.7991e-02, -8.0658e-02,  ..., -6.0271e-04,
           -2.5287e-02,  2.5471e-02],
          [-6.9023e-02, -6.7555e-02, -7.6360e-02,  ..., -3.9569e-03,
           -3.0397e-02,  1.0456e-02],
          [-9.9499e-02, -8.5584e-02, -1.0519e-01,  ..., -2.6571e-02,
           -2.2772e-02,  6.6298e-03]

In [31]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)

alexnet.to(device)

cuda:0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [32]:
#Testing Accuracy
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = alexnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 83 %


# Row-wise quantization
If you ran the per tensor quantization, you should re-initialize the model

In [96]:
count = 0
for layer in [*alexnet.features,*alexnet.classifier]:
    count += 1
    # try:
    if len(layer.weight.shape) == 4:
        weights = layer.weight.detach()
        print(f'Layer {count}')# weights shape pre-quantization: {weights.shape}\nWeights: {weights}')
        for filter in range(0, weights.shape[0]):
            # print(f'Filter num {filter}')
            for channel in range(0, weights.shape[1]):
                for row in range(0, weights.shape[2]):
                    weights[filter,channel,row] = quantize_requantize(weights[filter,channel,row], torch.float32, torch.int8)
                # for row in range(0,weights.shape[2]):
                #     weights[filter,channel, row] = quantize_dequantize_dt(weights[filter,channel,row])
                # print(f'Finish window')
        # print(f'Layer {count} weights shape post-quantization: {weights.shape}\nWeights: {weights}')
        # layer.weight = nn.parameter.Parameter(weights)
        print(f'Layer {count} weights shape post-quantization: {weights.shape}\nWeights: {weights}')
        layer.weight = nn.parameter.Parameter(weights)
    else:
        weights = layer.weight.detach()
        print(f'Layer {count}')# weights shape pre-quantization: {layer.weight.shape}\nWeights: {weights}')
        for row in range(0, weights.shape[0]):
            weights[row] = quantize_requantize(weights[row], torch.float32, torch.int8)
        # for row in tqdm(range(0,weights.shape[0])):
        #     weights[row] = quantize_dequantize_dt(weights[row])
        layer.weight = nn.parameter.Parameter(weights)
        # print(f'Layer {count} weights shape post-quantization: {layer.weight.shape}\nWeights: {weights}')
        # print(layer.weight)
    # except (TypeError, AttributeError):
    #     pass

Layer 1


AttributeError: module 'numpy' has no attribute 'float128'

In [64]:
#Instantiating CUDA device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#Verifying CUDA
print(device)

#Move the input and alexnet to GPU for speed if available
alexnet.to(device)

cuda:0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [65]:
#Testing Accuracy
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = alexnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 10 %


---

In [36]:
# alexnet.features[3].weight

Parameter containing:
tensor([[[[  0.,   0.,   3.,  -2.,   0.],
          [  1.,   0.,   7.,  -6.,   0.],
          [  1.,  -8.,   1.,  -6.,   1.],
          [ -1.,  -4.,  -3.,   0.,  -1.],
          [ -2.,   0.,   2.,   0.,  -1.]],

         [[  0.,  -1.,  -3.,   0.,   0.],
          [ -1.,   1.,   0.,   0.,   0.],
          [ -2.,   3.,   0.,  -1.,   0.],
          [ -4.,   0.,   1.,  -1.,   0.],
          [  0.,  -2.,  -1.,  -1.,   1.]],

         [[ -1.,  -5.,  -7.,   2.,   4.],
          [ -1.,  -8.,  -2.,   3.,   3.],
          [ -1.,   2.,  15.,   3.,  -4.],
          [  1.,   8.,   2., -10.,  -3.],
          [  3.,   4.,   0.,  -6.,   0.]],

         ...,

         [[ -1.,   1.,   2.,  -1.,   0.],
          [ -1.,   1.,   6.,  -3.,  -4.],
          [ -3.,  -6.,   7.,   0.,  -5.],
          [  0.,   2.,   4.,   3.,  -1.],
          [  0.,   0.,  -2.,  -1.,  -1.]],

         [[ -2.,  -3.,   3.,   1.,  -5.],
          [ -2.,  -4.,   6.,   2.,   1.],
          [  0.,  -1.,  -5.,  -

---

In [None]:
def 

In [None]:
def quantize_layer()

In [None]:
def quantize_model(model):
    

In [13]:
torch.float32.__attributes__

AttributeError: 'torch.dtype' object has no attribute '__attributes__'

In [14]:
method_list = [func for func in dir(torch.float32) if callable(getattr(torch.float32, func))]

In [22]:
torch.iinfo(torch.int8).max

127

torch.dtype

In [163]:
I = (torch.eye(3) * 1000) @ 

SyntaxError: invalid syntax (3045845854.py, line 1)

In [164]:
r = (torch.rand((3,3)) * 1000000).round()
r

tensor([[884515., 876362., 208928.],
        [504534.,  17749., 983694.],
        [374258., 446598., 475318.]])

In [165]:
qr = quantize_requantize(r.clone().detach(),torch.float32, torch.int8)
qr

tensor(0.0003)
tensor([[ -23,  -25,   55],
        [-123,    4,    3],
        [  98,  117,  125]], dtype=torch.int8)


tensor([[ -87124.4531,  -94700.4844,  208341.0781],
        [-465926.4062,   15152.0781,   11364.0586],
        [ 371225.9062,  443198.2812,  473502.4375]])

In [166]:
r

tensor([[884515., 876362., 208928.],
        [504534.,  17749., 983694.],
        [374258., 446598., 475318.]])

In [167]:
mse = ((qr - r)**2).mean(axis=None)
mse

tensor(4.1937e+11)

In [196]:
import datetime
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
model_path = 'model_{}_{}'.format(timestamp, "quantizationattempt")
torch.save(alexnet.state_dict(), model_path)

---

# Model Size Change

In [97]:
def estimate_model_size_change(alexnet, bit_length):
    data_type_sizes = []
    abs_max_sizes = []
    count = 0
    bits_in_fp32 = 8 + 23
    for layer in [*alexnet.features,*alexnet.classifier]:
        count += 1 
        # curr_layer_path = curr_path / f'layer{count}.npy' 
        curr_data_size = 0
        curr_abs_max_size = 0
        try:
            data_type_sizes.append(0)
            abs_max_sizes.append(0)
            if len(layer.weight.shape) == 4:
                weights = layer.weight.detach()
                # print(f'Layer {count}')# weights shape pre-quantization: {weights.shape}\nWeights: {weights}')
                for filter in range(0, weights.shape[0]):
                    # print(f'Filter num {filter}')
                    for channel in range(0, weights.shape[1]):
                        # print(f'Channel num {channel}')
                        # print(layer.weight[filter,channel])
                        data_type_sizes[-1] += weights[filter,channel].numel()
                        abs_max_sizes[-1] += weights[filter,channel].shape[0]
                        # for row in range(0,weights.shape[2]):
                        #     weights[filter,channel, row] = quantize_dequantize_dt(weights[filter,channel,row])
                        # print(f'Finish window')
                # print(f'Layer {count} weights shape post-quantization: {weights.shape}\nWeights: {weights}')
                # layer.weight = nn.parameter.Parameter(weights)
                # print(f'Layer {count} weights shape post-quantization: {weights.shape}\nWeights: {weights}')
                # layer.weight = nn.parameter.Parameter(weights)
            else:
                weights = layer.weight.detach()
                # print(f'Layer {count}')# weights shape pre-quantization: {layer.weight.shape}\nWeights: {weights}')
                data_type_sizes[-1] += weights.numel()
                abs_max_sizes[-1] += weights.shape[0]
                # print(weights.shape)
                # for row in tqdm(range(0,weights.shape[0])):
                #     weights[row] = quantize_dequantize_dt(weights[row])
                # layer.weight = nn.parameter.Parameter(weights)
                # print(f'Layer {count} weights shape post-quantization: {layer.weight.shape}\nWeights: {weights}')
                # print(layer.weight)
        except (TypeError, AttributeError):
            pass
    return {'data_type_counts': np.array(data_type_sizes), 'data_type_sizes': np.array(data_type_sizes) * bit_length, 'abs_max_counts': np.array(abs_max_sizes), 'abs_max_sizes': np.array(abs_max_sizes) * bits_in_fp32,
           'data_type_sizes_original': np.array(data_type_sizes) * bits_in_fp32}

In [98]:
def bits_to_mb(bits):
    return bits / 8000000

In [99]:
def compute_model_size(model, bit_len):
    bits_in_fp32 = 8 + 23
    results = estimate_model_size_change(model, bit_len)
    data_type_size = results['data_type_sizes'].sum()
    abs_max_size = results['abs_max_sizes'].sum()
    dt_counts = results['data_type_counts'].sum()
    display(Markdown(f'## {bit_len} bits'))
    print(f'{data_type_size} bits to represent the {bit_len} quantized dt and {abs_max_size} bits to represent the maxes')
    print(f'{bits_to_mb(data_type_size)} mb to represent the {bit_len} quantized dt and {bits_to_mb(abs_max_size)} mb to represent the maxes')
    print(f'\nOriginal Model Size: {bits_to_mb( dt_counts * bits_in_fp32)} MB')
    print(f'Quantized Model Size: {bits_to_mb( data_type_size +abs_max_size)} MB')
    print(f'This is a {bits_to_mb( dt_counts * bits_in_fp32) / (bits_to_mb(data_type_size)+bits_to_mb(abs_max_size))}x decrease in size')
    return (bits_to_mb(data_type_size)+bits_to_mb(abs_max_size)), bits_to_mb(data_type_size),bits_to_mb(abs_max_size)

In [100]:
def flatten_model(model):
    flattened_model = torch.tensor([])
    for layer in [*model.features,*model.classifier]:
        try:
            weights = layer.weight.detach()
            flattened_model = torch.concatenate((flattened_model, weights.flatten()))
        except (TypeError, AttributeError):
            pass
    return flattened_model

In [102]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
    
model.classifier[4] = nn.Linear(4096,1024)
model.classifier[6] = nn.Linear(1024,10)

model.load_state_dict(torch.load(f'./model_20240603_151633_final_frozen_alexnet',map_location=device))
model.eval()

flattened_alexnet = flatten_model(alexnet)
print(flattened_alexnet.shape[0] * 8, bits_to_mb(flattened_alexnet.shape[0] * 8))
print(flattened_alexnet.shape[0] * 16, bits_to_mb(flattened_alexnet.shape[0] * 16))

Using cache found in C:\Users\Elijah/.cache\torch\hub\pytorch_vision_v0.10.0


355374592 44.421824
710749184 88.843648
