# Post Hoc Quantisation of RNLFT Models with HuggingFace

In [1]:
import os
import argparse
import random
import time
import json

import numpy as np
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import *
from torch.optim import *
import torch.nn.functional as F

from sklearn.metrics import *
from sklearn.model_selection import KFold

import sys
sys.path.append('.')

from src.modules import *
from src.data_handler import *
from src import logger
from src.class_balanced_loss import *
from typing import NamedTuple
from torchvision.models import efficientnet as efn

from train_glaucoma_fair_fin_hf import train, validation, Identity_Info, quantifiable_efficientnet

from fairlearn.metrics import *

imb_info = Identity_Info()

In [2]:
out_dim = 1
criterion = nn.BCEWithLogitsLoss()
predictor_head = nn.Sigmoid()
in_feat_to_final = 1280
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

fin_mu = 0.01
fin_sigma = 1.
fin_momentum = 0.3
modality_types = 'rnflt'
task = 'cls'
model_type = 'efficientnet'  # 'resnext' or 'quant' or 'efficientnet'
normalise_data = False
if model_type == 'resnext':
    if normalise_data:
        pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_resnext_rnflt_Taskcls_lr5e-5_bz6_normdata1_2468_auc0.8516/best_weights.pth'
    else:
        pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_resnext_rnflt_Taskcls_lr5e-5_bz6_865_auc0.8510/last_weights.pth'
elif model_type == 'efficientnet':
    pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_9764_auc0.8553/best_weights.pth'
else:
    if normalise_data:
        pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_quant_rnflt_Taskcls_lr5e-5_bz6_normdata1_21_auc0.8450/best_weights.pth'
    else:
        pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_quant_rnflt_Taskcls_lr5e-5_bz6_9354_auc0.8495/best_weights.pth'
ag_norm = Fair_Identity_Normalizer(
    3,
    dim=in_feat_to_final,
    mu=fin_mu,
    sigma=fin_sigma,
    momentum=fin_momentum,
)
in_dim = 1
# model = quantifiable_efficientnet(width_mult=1.0, depth_mult=1.0, weights=EfficientNet_B1_Weights.IMAGENET1K_V2)# create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
# model = create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
# final_layer = nn.Linear(in_features=in_feat_to_final, out_features=out_dim, bias=False)
# model = nn.Sequential(model, ag_norm, final_layer)
# model = model.to(device)
# 
# checkpoint = torch.load(pretrained_weights)
# 
# start_epoch = checkpoint['epoch'] + 1
# model.load_state_dict(checkpoint['model_state_dict'])

In [3]:
data_dir = "../quant_notes/data_cmpr"
image_size = 200
attribute_type = 'race' 

trn_dataset = EyeFair(
    os.path.join(data_dir, "train"),
    depth=3 if model_type == "resnext" else 1,
    modality_type=modality_types,
    task=task,
    resolution=image_size,
    attribute_type=attribute_type,
    normalise_data=normalise_data
    
)


min: -31.9900, max: 2.2700


In [4]:
batch_size = 6
validation_dataset_loader = torch.utils.data.DataLoader(trn_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [5]:
def test(model, criterion, optimizer, data_loader, epoch, identity_Info=None, _device='cuda'):
    res = validation(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=_device)
    return res[1]

In [8]:
test(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)

cuda


RuntimeError: Could not infer dtype of numpy.float32

In [6]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)

NameError: name 'model' is not defined

In [24]:
from optimum.quanto import qint8, quantize

quantize(model, weights=qint8)

In [25]:
print_size_of_model(model)

Size (MB): 26.575981


##########################

In [2]:
!pip freeze | grep accelerate

accelerate==0.31.0


In [24]:
def load_full_model(model_path):
    config = AutoConfig.from_pretrained(model_path)
    wrapper = EfficientNetWrapper.from_pretrained(model_path)
    
    in_feat_to_final = wrapper.backbone.config.hidden_size
    out_dim = config.num_labels
    
    ag_norm = nn.BatchNorm1d(in_feat_to_final)  # Assuming this is what ag_norm is
    final_layer = nn.Linear(in_features=in_feat_to_final, out_features=out_dim, bias=False)
    
    full_model = nn.Sequential(wrapper, ag_norm, final_layer)
    return full_model

In [7]:
import torch
import transformers
from transformers import AutoImageProcessor, AutoModelForImageClassification, QuantoConfig
# from optimum.quanto import qint8, quantize

model_path = "/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_5694_auc0.7924/best_model"

# processor = AutoImageProcessor.from_pretrained(model_path)

# quantization_config = QuantoConfig(weights="int8")
# 
# quantized_model = EfficientNetWrapper.from_pretrained(
#     model_path,
#     device_map="cuda:0",
#     quantization_config=quantization_config
# )

In [8]:
from transformers import AutoModel, AutoConfig
from src.modules import EfficientNetWrapper
from safetensors.torch import load_file

config = AutoConfig.from_pretrained(model_path, device='cuda')

model = EfficientNetWrapper(config)

state_dict = load_file(model_path + "/model.safetensors")
model.load_state_dict(state_dict)

model.eval()
print(model.device)
print(torch.cuda.is_available())
model.to('cuda')
print(model.device)

print_size_of_model(model)

cpu
True
cuda:0
Size (MB): 26.515317


In [9]:
test(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)

cuda
test <==== epcoh 10 loss: 0.5644 auc: 0.8953
0-attr auc: 0.9090
1-attr auc: 0.8935
2-attr auc: 0.8816


0.7585714285714286

In [10]:
from optimum.quanto import quantize, qint8

In [11]:
quantize(model, weights=qint8)

In [12]:
print_size_of_model(model)

Size (MB): 26.609645


In [13]:
test(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)

cuda
test <==== epcoh 10 loss: 0.5641 auc: 0.8946
0-attr auc: 0.9087
1-attr auc: 0.8929
2-attr auc: 0.8803


0.7604761904761905

In [14]:
from optimum.quanto import freeze

In [15]:
freeze(model)

In [16]:
print_size_of_model(model)

Size (MB): 7.554601


In [17]:
test(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)

cuda
test <==== epcoh 10 loss: 0.5641 auc: 0.8946
0-attr auc: 0.9087
1-attr auc: 0.8929
2-attr auc: 0.8803


0.7604761904761905

In [3]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print("Torch version:", torch.__version__)
print("Transformers version:", transformers.__version__)


True
NVIDIA GeForce RTX 4060 Ti
Torch version: 2.3.1
Transformers version: 4.42.0.dev0


In [16]:
!pip show bitsandbytes

Name: bitsandbytes
Version: 0.43.1
Summary: k-bit optimizers and matrix multiplication routines.
Home-page: https://github.com/TimDettmers/bitsandbytes
Author: Tim Dettmers
Author-email: dettmers@cs.washington.edu
License: MIT
Location: /home/platelminto/miniconda3/envs/harvard_gf/lib/python3.10/site-packages
Requires: numpy, torch
Required-by: 


In [7]:
from copy import deepcopy

import torch.ao.quantization
qmodel = deepcopy(model).to('cpu')
if model_type == 'resnext':
    qmodel[0].fuse_model(is_qat=False)
else:
    qmodel[0] = torch.quantization.QuantWrapper(qmodel[0])
qmodel[1].v = False
# qmodel = torch.ao.quantization.fuse_modules(model, ['conv2', 'bn2'])
qmodel[2] = torch.quantization.QuantWrapper(qmodel[2])
qmodel.qconfig = torch.ao.quantization.default_per_channel_qconfig
print(qmodel.qconfig)


QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})


In [8]:
import torch.ao.quantization


qmodel.eval().to("cpu")
qconf = torch.quantization.QConfig(
    activation=torch.quantization.MovingAverageMinMaxObserver.with_args(
        qscheme=torch.per_tensor_symmetric
    ),
    weight=torch.quantization.MovingAveragePerChannelMinMaxObserver.with_args(
        qscheme=torch.per_channel_symmetric, dtype=torch.qint8
    ),
)  # torch.ao.quantization.default_per_channel_qconfig.weight)
qmodel.qconfig = qconf  # torch.ao.quantization.default_per_channel_qconfig
print(qmodel.qconfig)
torch.ao.quantization.prepare(qmodel, inplace=True)

# Calibrate here
res = validation(
    qmodel,
    criterion,
    None,
    validation_dataset_loader,
    10,
    identity_Info=imb_info,
    _device="cpu",
)
qmodel[1].v = False
# Convert here
torch.ao.quantization.convert(qmodel, inplace=True)
print_size_of_model(qmodel)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MovingAverageMinMaxObserver'>, qscheme=torch.per_tensor_symmetric){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver'>, qscheme=torch.per_channel_symmetric, dtype=torch.qint8){})
cpu
test <==== epcoh 10 loss: 249.9945 auc: 0.8568
0-attr auc: 0.8765
1-attr auc: 0.8508
2-attr auc: 0.8280
Size (MB): 8.068754


In [9]:
with torch.no_grad():
        for i, (x, target, attr) in enumerate(validation_dataset_loader):
            x = x.to(device)
            target = target.to(device)
            attr = attr.to(device)
            break

x.shape, target, attr

(torch.Size([6, 1, 200, 200]),
 tensor([1., 1., 1., 1., 1., 0.], device='cuda:0'),
 tensor([1, 1, 1, 1, 1, 2], device='cuda:0', dtype=torch.int32))

In [11]:
res = validation(qmodel, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=torch.device('cpu'))
# next(model.parameters()).is_cuda

cpu


NotImplementedError: Could not run 'aten::silu.out' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::silu.out' is only available for these backends: [CPU, CUDA, Meta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

CPU: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/build/aten/src/ATen/RegisterCPU.cpp:31419 [kernel]
CUDA: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/build/aten/src/ATen/RegisterCUDA.cpp:44504 [kernel]
Meta: registered at /dev/null:241 [kernel]
BackendSelect: fallthrough registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/core/PythonFallbackKernel.cpp:154 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/build/aten/src/ATen/RegisterFunctionalization_1.cpp:25813 [kernel]
Named: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/ADInplaceOrViewType_1.cpp:5369 [kernel]
AutogradOther: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradCPU: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradCUDA: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradHIP: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradXLA: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradMPS: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradIPU: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradXPU: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradHPU: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradVE: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradLazy: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradMTIA: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradPrivateUse1: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradPrivateUse2: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradPrivateUse3: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradMeta: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
AutogradNestedTensor: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/VariableType_3.cpp:18859 [autograd kernel]
Tracer: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/torch/csrc/autograd/generated/TraceType_3.cpp:14745 [kernel]
AutocastCPU: fallthrough registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/autocast_mode.cpp:378 [backend fallback]
AutocastCUDA: fallthrough registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/autocast_mode.cpp:244 [backend fallback]
FuncTorchBatched: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/functorch/TensorWrapper.cpp:202 [backend fallback]
PythonTLSSnapshot: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/core/PythonFallbackKernel.cpp:162 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/core/PythonFallbackKernel.cpp:166 [backend fallback]
PythonDispatcher: registered at /opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/core/PythonFallbackKernel.cpp:158 [backend fallback]


In [17]:
res[1]

0.5157142857142857

In [18]:
# model
qmodel

Sequential(
  (0): QuantWrapper(
    (quant): Quantize(scale=tensor([0.0499]), zero_point=tensor([128]), dtype=torch.quint8)
    (dequant): DeQuantize()
    (module): EfficientNet(
      (features): Sequential(
        (0): Conv2dNormActivation(
          (0): QuantizedConv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), scale=0.06287065893411636, zero_point=128, padding=(1, 1), bias=False)
          (1): QuantizedBatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): QuantizedHardswish()
        )
        (1): Sequential(
          (0): QuantizableMBConv(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=3.2027080059051514, zero_point=128, padding=(1, 1), groups=32, bias=False)
                (1): QuantizedBatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                (2): QuantizedHardswish()
              

In [13]:
qmodel.qconfig

AttributeError: 'Sequential' object has no attribute 'qconfig'