# Post Hoc Quantisation of RNLFT Models

In [1]:
import os
import argparse
import random
import time
import json

import numpy as np
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import *
from torch.optim import *
import torch.nn.functional as F

from sklearn.metrics import *
from sklearn.model_selection import KFold

import sys
sys.path.append('.')

from src.modules import *
from src.data_handler import *
from src import logger
from src.class_balanced_loss import *
from typing import NamedTuple
from torchvision.models import efficientnet as efn

from train_glaucoma_fair_fin import train, validation, Identity_Info, quantifiable_efficientnet

from fairlearn.metrics import *

imb_info = Identity_Info()

In [15]:
out_dim = 1
criterion = nn.BCEWithLogitsLoss()
predictor_head = nn.Sigmoid()
in_feat_to_final = 1280
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

fin_mu = 0.01
fin_sigma = 1.
fin_momentum = 0.3
model_type = 'resnext'  # or quant
modality_types = 'rnflt'
task = 'cls'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_564_auc0.8569/last_weights.pth'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_564_auc0.8569/best_weights.pth'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_quant_rnflt_Taskcls_lr5e-5_bz6_4442_auc0.7311/best_weights.pth'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_quant_rnflt_Taskcls_lr5e-5_bz6_9354_auc0.8495/best_weights.pth'
pretrained_weights = 'results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_resnext_rnflt_Taskcls_lr5e-5_bz6_7315_auc0.8513/last_weights.pth'
ag_norm = Fair_Identity_Normalizer(
    3,
    dim=in_feat_to_final,
    mu=fin_mu,
    sigma=fin_sigma,
    momentum=fin_momentum,
)
in_dim = 1
# model = quantifiable_efficientnet(width_mult=1.0, depth_mult=1.0, weights=EfficientNet_B1_Weights.IMAGENET1K_V2)# create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
model = create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
final_layer = nn.Linear(in_features=in_feat_to_final, out_features=out_dim, bias=False)
model = nn.Sequential(model, ag_norm, final_layer)
model = model.to(device)

checkpoint = torch.load(pretrained_weights)

start_epoch = checkpoint['epoch'] + 1
model.load_state_dict(checkpoint['model_state_dict'])
# efnm = create_model(model_type=model_type, in_dim=in_dim, out_dim=out_dim, include_final=False)
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# scaler.load_state_dict(checkpoint['scaler_state_dict'])
# scheduler.load_state_dict(checkpoint['scheduler_state_dict'])

<All keys matched successfully>

In [16]:
data_dir = "../quant_notes/data_cmpr"
image_size = 200
attribute_type = 'race' 

trn_dataset = EyeFair(
    os.path.join(data_dir, "train"),
    modality_type=modality_types,
    task=task,
    resolution=image_size,
    attribute_type=attribute_type,
    depth=3
)


min: -31.9900, max: 2.2700


In [17]:
batch_size = 6
validation_dataset_loader = torch.utils.data.DataLoader(trn_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [18]:
res = validation(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)
res[1]

cuda
test <==== epcoh 10 loss: 1.7404 auc: 0.9903
0-attr auc: 0.9974
1-attr auc: 0.9956
2-attr auc: 0.9763


0.9871428571428571

In [19]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)

Size (MB): 337.143069


In [25]:
        # self.quant = QuantStub()
        # self.dequant = DeQuantStub()
from copy import deepcopy

import torch.ao.quantization
qmodel = deepcopy(model)
qmodel[0].fuse_model(is_qat=False)
qmodel[1].v = False
# qmodel = torch.ao.quantization.fuse_modules(model, ['conv2', 'bn2'])
# qmodel[0] = torch.quantization.QuantWrapper(qmodel[0])
qmodel[2] = torch.quantization.QuantWrapper(qmodel[2])
# pre_model_stub = nn.Sequential(torch.ao.quantization.QuantStub(), qmodel[0])
# post_model_stub =  nn.Sequential(qmodel[-1], torch.ao.quantization.DeQuantStub())
# qmodel[0] = pre_model_stub
# qmodel[-1] = post_model_stub

In [26]:
import torch.ao.quantization


qmodel.eval().to('cpu')
# qconf = torch.quantization.QConfig(
#     activation=torch.quantization.MovingAverageMinMaxObserver.with_args(
#         qscheme=torch.per_tensor_symmetric
#     ),
#     weight=torch.quantization.MovingAveragePerChannelMinMaxObserver.with_args(
#         qscheme=torch.per_channel_symmetric, dtype=torch.qint8
#     ),
# )  # torch.ao.quantization.default_per_channel_qconfig.weight)
qmodel.qconfig = torch.ao.quantization.default_per_channel_qconfig
print(qmodel.qconfig)
torch.ao.quantization.prepare(qmodel, inplace=True)

# Calibrate here
res = validation(
    qmodel,
    criterion,
    None,
    validation_dataset_loader,
    10,
    identity_Info=imb_info,
    _device='cpu',
)
qmodel[1].v = True
# Convert here
torch.ao.quantization.convert(qmodel, inplace=True)
print_size_of_model(qmodel)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
cpu
test <==== epcoh 10 loss: 1.7404 auc: 0.9903
0-attr auc: 0.9974
1-attr auc: 0.9956
2-attr auc: 0.9763
Size (MB): 86.105431


In [9]:
with torch.no_grad():
        for i, (x, target, attr) in enumerate(validation_dataset_loader):
            x = x.to(device)
            target = target.to(device)
            attr = attr.to(device)
            break

x.shape, target, attr

(torch.Size([6, 3, 200, 200]),
 tensor([1., 1., 1., 1., 0., 1.], device='cuda:0'),
 tensor([2, 1, 0, 1, 1, 0], device='cuda:0', dtype=torch.int32))

In [28]:
qmodel[1].v=False
res = validation(qmodel, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=torch.device('cpu'))

res[1]
# next(model.parameters()).is_cuda

cpu
test <==== epcoh 10 loss: 6.2672 auc: 0.4568
0-attr auc: 0.4493
1-attr auc: 0.3831
2-attr auc: 0.4487


0.46523809523809523

In [13]:
res[1]

0.5071428571428571

In [14]:
# model
qmodel

Sequential(
  (0): QuantizableResNet(
    (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=1.8434251546859741, zero_point=0, padding=(3, 3))
    (bn1): Identity()
    (relu): Identity()
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): QuantizableBottleneck(
        (conv1): QuantizedConvReLU2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.9331411123275757, zero_point=0)
        (bn1): Identity()
        (conv2): QuantizedConvReLU2d(256, 256, kernel_size=(3, 3), stride=(1, 1), scale=1.5554529428482056, zero_point=0, padding=(1, 1), groups=64)
        (bn2): Identity()
        (conv3): QuantizedConv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), scale=2.313227415084839, zero_point=74)
        (bn3): Identity()
        (relu): ReLU()
        (downsample): Sequential(
          (0): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=4.313520431518555, zero_point=85

In [None]:
qmodel.qconfig