# Post Hoc Quantisation of RNLFT Models

In [1]:
import os
import argparse
import random
import time
import json

import numpy as np
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import *
from torch.optim import *
import torch.nn.functional as F

from sklearn.metrics import *
from sklearn.model_selection import KFold

import sys
sys.path.append('.')

from src.modules import *
from src.data_handler import *
from src import logger
from src.class_balanced_loss import *
from typing import NamedTuple
from torchvision.models import efficientnet as efn

from train_glaucoma_fair_fin import train, validation, Identity_Info, quantifiable_efficientnet

from fairlearn.metrics import *

imb_info = Identity_Info()

In [2]:
out_dim = 1
criterion = nn.BCEWithLogitsLoss()
predictor_head = nn.Sigmoid()
in_feat_to_final = 1280
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

fin_mu = 0.01
fin_sigma = 1.
fin_momentum = 0.3
model_type = 'efficientnet'
modality_types = 'rnflt'
task = 'cls'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_564_auc0.8569/last_weights.pth'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_564_auc0.8569/best_weights.pth'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_quant_rnflt_Taskcls_lr5e-5_bz6_4442_auc0.7311/best_weights.pth'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_quant_rnflt_Taskcls_lr5e-5_bz6_9354_auc0.8495/best_weights.pth'
ag_norm = Fair_Identity_Normalizer(
    3,
    dim=in_feat_to_final,
    mu=fin_mu,
    sigma=fin_sigma,
    momentum=fin_momentum,
)
in_dim = 1
# model = quantifiable_efficientnet(width_mult=1.0, depth_mult=1.0, weights=EfficientNet_B1_Weights.IMAGENET1K_V2)# create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
model = quantifiable_efficientnet(width_mult=1.0, depth_mult=1.1)# create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
final_layer = nn.Linear(in_features=in_feat_to_final, out_features=out_dim, bias=False)
model = nn.Sequential(model, ag_norm, final_layer)
model = model.to(device)

checkpoint = torch.load(pretrained_weights)

start_epoch = checkpoint['epoch'] + 1
model.load_state_dict(checkpoint['model_state_dict'])
efnm = create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# scaler.load_state_dict(checkpoint['scaler_state_dict'])
# scheduler.load_state_dict(checkpoint['scheduler_state_dict'])

In [3]:
data_dir = "../quant_notes/data_cmpr"
image_size = 200
attribute_type = 'race' 

trn_dataset = EyeFair(
    os.path.join(data_dir, "train"),
    modality_type=modality_types,
    task=task,
    resolution=image_size,
    attribute_type=attribute_type,
)


min: -31.9900, max: 2.2700


In [20]:
batch_size = 6
validation_dataset_loader = torch.utils.data.DataLoader(trn_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [21]:
res = validation(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)
res[1]

cuda
test <==== epcoh 10 loss: 1.6386 auc: 0.8474
0-attr auc: 0.8725
1-attr auc: 0.8414
2-attr auc: 0.8169


0.7657142857142857

In [6]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)

Size (MB): 26.498741


In [37]:
        # self.quant = QuantStub()
        # self.dequant = DeQuantStub()
from copy import deepcopy

import torch.ao.quantization
qmodel = deepcopy(model)
qmodel[1].v = False
# qmodel = torch.ao.quantization.fuse_modules(model, ['conv2', 'bn2'])
qmodel[0] = torch.quantization.QuantWrapper(qmodel[0])
qmodel[2] = torch.quantization.QuantWrapper(qmodel[2])
# pre_model_stub = nn.Sequential(torch.ao.quantization.QuantStub(), qmodel[0])
# post_model_stub =  nn.Sequential(qmodel[-1], torch.ao.quantization.DeQuantStub())
# qmodel[0] = pre_model_stub
# qmodel[-1] = post_model_stub

In [38]:
import torch.ao.quantization


qmodel.eval().to("cpu")
qconf = torch.quantization.QConfig(
    activation=torch.quantization.MovingAverageMinMaxObserver.with_args(
        qscheme=torch.per_tensor_symmetric
    ),
    weight=torch.quantization.MovingAveragePerChannelMinMaxObserver.with_args(
        qscheme=torch.per_channel_symmetric, dtype=torch.qint8
    ),
)  # torch.ao.quantization.default_per_channel_qconfig.weight)
qmodel.qconfig = qconf  # torch.ao.quantization.default_per_channel_qconfig
print(qmodel.qconfig)
torch.ao.quantization.prepare(qmodel, inplace=True)

# Calibrate here
res = validation(
    qmodel,
    criterion,
    None,
    validation_dataset_loader,
    10,
    identity_Info=imb_info,
    _device="cpu",
)
qmodel[1].v = True
# Convert here
torch.ao.quantization.convert(qmodel, inplace=True)
print_size_of_model(qmodel)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MovingAverageMinMaxObserver'>, qscheme=torch.per_tensor_symmetric){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver'>, qscheme=torch.per_channel_symmetric, dtype=torch.qint8){})
cpu
test <==== epcoh 10 loss: 1.6385 auc: 0.8474
0-attr auc: 0.8725
1-attr auc: 0.8414
2-attr auc: 0.8169




Size (MB): 8.143738


In [None]:
with torch.no_grad():
        for i, (x, target, attr) in enumerate(validation_dataset_loader):
            x = x.to(device)
            target = target.to(device)
            attr = attr.to(device)
            break

x.shape, target, attr

In [39]:
res = validation(qmodel, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=torch.device('cpu'))
# next(model.parameters()).is_cuda

cpu
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.1041, 1.1041,  ..., 0.0000, 1.1041, 0.0000],
        [1.1041, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 3.3123, 1.1041,  ..., 0.0000, 1.1041, 0.0000],
        [0.0000, 2.2082, 4.4165,  ..., 0.0000, 2.2082, 1.1041],
        [1.1041, 1.1041, 1.1041,  ..., 0.0000, 0.0000, 0.0000]])
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 5.5206,  2.2082,  5.5206,  ...,  2.2082,  1.1041,  2.2082],
        [ 3.3123,  5.5206, 11.0411,  ...,  1.1041,  1.1041,  5.5206],
        [ 0.0000,  1.1041,  2.2082,  ...,  0.0000,  0.0000,  1.1041],
        [ 1.1041,  1.1041,  6.6247,  ...,  4.4165,  3.3123,  2.2082]])
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 4.4165,  ..., 0.0000, 2.2082, 2.2082],
        [3.3123, 3.3123, 7.7288,  ..., 2.2082, 2.2082, 5.5206]

KeyboardInterrupt: 

In [26]:
res[1]

0.5114285714285715

In [22]:
# model
qmodel

Sequential(
  (0): QuantWrapper(
    (quant): Quantize(scale=tensor([2.5769]), zero_point=tensor([128]), dtype=torch.quint8)
    (dequant): DeQuantize()
    (module): EfficientNet(
      (features): Sequential(
        (0): Conv2dNormActivation(
          (0): QuantizedConv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), scale=3.3178462982177734, zero_point=128, padding=(1, 1), bias=False)
          (1): QuantizedBatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): QuantizedHardswish()
        )
        (1): Sequential(
          (0): QuantizableMBConv(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=3.739513635635376, zero_point=128, padding=(1, 1), groups=32, bias=False)
                (1): QuantizedBatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                (2): QuantizedHardswish()
              )


In [None]:
qmodel.qconfig