In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import datetime
from matplotlib import pyplot as plt
import numpy as np
from tqdm import tqdm

from pathlib import Path
import pickle
from IPython.display import display, Markdown, Latex

# Initialize Model

In [None]:
bit_lengths = [8,7,6,5,4,3]


In [None]:
model_path = Path('./models')

In [None]:
fig_path = Path('./figures')

In [None]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
train_data = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform)
test_data = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(train_data, batch_size=4, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_data, batch_size=4, shuffle=False, num_workers=2)

classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
import torchvision.models as models
path = "Automated_Output"
def create_model(bit_length):
    vgg16 = models.vgg16(pretrained=False)
    vgg16.classifier[4] = nn.Linear(4096,1024)
    vgg16.classifier[6] = nn.Linear(1024,10)
    vgg16.load_state_dict(torch.load(f'{path}/dt_quantized_model_bl_{bit_length}',map_location=device))
    # vgg16.load_state_dict(torch.load(f'model_20240610_113550_DT_Quantized_NEW',map_location=device))

    vgg16.eval()
    return vgg16

In [None]:
accuracies = {7: 85.22, 6: 85.12, 5: 85.06, 4: 84.87, 3: 83.51, 2: 83.51}#, 1: 25.24}

In [None]:
# plt.plot(list(accuracies.keys()), list(accuracies.values()))
# plt.gca().invert_xaxis()
# plt.ylabel('Accuracy (%)')
# plt.xlabel('Number of bits')
# plt.title('vgg16 Accuracy vs Number of DT Bits')
# plt.savefig(fig_path / 'vgg16_accuracy_v_bits.png')

---

In [None]:
counts = []
for bit_length in bit_lengths:
    with open(f'{path}/dt_counts_bl_{bit_length}.pkl', 'rb') as f:
        counts.append(pickle.load(f))
        print(f'{bit_length} bit exponent counts: {counts[-1]}')

In [None]:
my_models = {}
for bit_length in bit_lengths:
    my_models[bit_length] = create_model(bit_length);

In [None]:
def num_unique_vals(model):
    return np.unique(model.classifier[4].weight.flatten().detach().numpy()).shape
for bit_length in bit_lengths:
    print(f'Number of unique values for {bit_length} exponent dt quantization: {num_unique_vals(models[bit_length])}')

# Size Estimation

In [None]:
def estimate_model_size_change(alexnet, bit_length):
    data_type_sizes = []
    abs_max_sizes = []
    count = 0
    bits_in_fp32 = 8 + 23
    for layer in [*alexnet.features,*alexnet.classifier]:
        count += 1 
        # curr_layer_path = curr_path / f'layer{count}.npy' 
        curr_data_size = 0
        curr_abs_max_size = 0
        try:
            data_type_sizes.append(0)
            abs_max_sizes.append(0)
            if len(layer.weight.shape) == 4:
                weights = layer.weight.detach()
                # print(f'Layer {count}')# weights shape pre-quantization: {weights.shape}\nWeights: {weights}')
                for filter in range(0, weights.shape[0]):
                    # print(f'Filter num {filter}')
                    for channel in range(0, weights.shape[1]):
                        # print(f'Channel num {channel}')
                        # print(layer.weight[filter,channel])
                        data_type_sizes[-1] += weights[filter,channel].numel()
                        abs_max_sizes[-1] += weights[filter,channel].shape[0]
                        # for row in range(0,weights.shape[2]):
                        #     weights[filter,channel, row] = quantize_dequantize_dt(weights[filter,channel,row])
                        # print(f'Finish window')
                # print(f'Layer {count} weights shape post-quantization: {weights.shape}\nWeights: {weights}')
                # layer.weight = nn.parameter.Parameter(weights)
                # print(f'Layer {count} weights shape post-quantization: {weights.shape}\nWeights: {weights}')
                # layer.weight = nn.parameter.Parameter(weights)
            else:
                weights = layer.weight.detach()
                # print(f'Layer {count}')# weights shape pre-quantization: {layer.weight.shape}\nWeights: {weights}')
                data_type_sizes[-1] += weights.numel()
                abs_max_sizes[-1] += weights.shape[0]
                # print(weights.shape)
                # for row in tqdm(range(0,weights.shape[0])):
                #     weights[row] = quantize_dequantize_dt(weights[row])
                # layer.weight = nn.parameter.Parameter(weights)
                # print(f'Layer {count} weights shape post-quantization: {layer.weight.shape}\nWeights: {weights}')
                # print(layer.weight)
        except (TypeError, AttributeError):
            pass
    return {'data_type_counts': np.array(data_type_sizes), 'data_type_sizes': np.array(data_type_sizes) * bit_length, 'abs_max_counts': np.array(abs_max_sizes), 'abs_max_sizes': np.array(abs_max_sizes) * bits_in_fp32,
           'data_type_sizes_original': np.array(data_type_sizes) * bits_in_fp32}

In [None]:
def bits_to_mb(bits):
    return bits / 8000000

In [None]:
def compute_model_size(model, bit_len):
    bits_in_fp32 = 8 + 23
    results = estimate_model_size_change(model, bit_len)
    data_type_size = results['data_type_sizes'].sum()
    abs_max_size = results['abs_max_sizes'].sum()
    dt_counts = results['data_type_counts'].sum()
    display(Markdown(f'## {bit_len} bits'))
    print(f'{data_type_size} bits to represent the {bit_len} quantized dt and {abs_max_size} bits to represent the maxes')
    print(f'{bits_to_mb(data_type_size)} mb to represent the {bit_len} quantized dt and {bits_to_mb(abs_max_size)} mb to represent the maxes')
    print(f'\nOriginal Model Size: {bits_to_mb( dt_counts * bits_in_fp32)} MB')
    print(f'Quantized Model Size: {bits_to_mb( data_type_size +abs_max_size)} MB')
    print(f'This is a {bits_to_mb( dt_counts * bits_in_fp32) / (bits_to_mb(data_type_size)+bits_to_mb(abs_max_size))}x decrease in size')
    return (bits_to_mb(data_type_size)+bits_to_mb(abs_max_size))

In [None]:
compressed_sizes = []
for bit_length in bit_lengths:
    compressed_sizes.append(compute_model_size(create_model(bit_length), bit_length))
    display(Markdown('---'))

In [None]:
plt.plot(bit_lengths,compressed_sizes)
plt.gca().invert_xaxis()
plt.xlabel('Number of Bits')
plt.ylabel('VGG16 Weights Size (MB)')
plt.title('VGG16 Weights Size vs Number of Bits')

# MSE Computer

In [None]:
# Flatten and Concatenate Entire Network

In [None]:
def flatten_model(model):
    flattened_model = torch.tensor([])
    for layer in [*model.features,*model.classifier]:
        try:
            weights = layer.weight.detach()
            flattened_model = torch.concatenate((flattened_model, weights.flatten()))
        except (TypeError, AttributeError):
            pass
    return flattened_model

In [None]:
import torchvision.models as models
device = 'cpu'
vgg16 = models.vgg16(pretrained=False)
model_path = 'checkpoints/finetuned_vgg16_9'
vgg16.classifier[4] = nn.Linear(4096,1024)
vgg16.classifier[6] = nn.Linear(1024,10)
vgg16.load_state_dict(torch.load(model_path))
vgg16.to(device)
output = ''
for bit_length in bit_lengths:
    model = create_model(bit_length)
    flattened_vgg16 = flatten_model(vgg16)
    flattened_quantized_vgg16 = flatten_model(model)
    model_size = flattened_vgg16.shape[0]
    MSE = torch.sum(torch.pow(flattened_vgg16 - flattened_quantized_vgg16, 2)) / model_size
    # MAE = torch.sum(torch.abs(flattened_vgg16 - flattened_quantized_vgg16)) / model_size
    print(f'{bit_length} model MSE: {MSE}')
    output += f'{bit_length} model MSE: {MSE}\n'
print(output)