# Post Hoc Quantisation of RNLFT Models with HuggingFace

In [1]:
import os
import argparse
import random
import time
import json

import numpy as np
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import *
from torch.optim import *
import torch.nn.functional as F

from sklearn.metrics import *
from sklearn.model_selection import KFold

import sys
sys.path.append('.')

from src.modules import *
from src.data_handler import *
from src import logger
from src.class_balanced_loss import *
from typing import NamedTuple
from torchvision.models import efficientnet as efn

from train_glaucoma_fair_fin_hf import train, validation, Identity_Info, quantifiable_efficientnet

from fairlearn.metrics import *

imb_info = Identity_Info()

In [2]:
out_dim = 1
criterion = nn.BCEWithLogitsLoss()
predictor_head = nn.Sigmoid()
in_feat_to_final = 1280
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

fin_mu = 0.01
fin_sigma = 1.
fin_momentum = 0.3
modality_types = 'rnflt'
task = 'cls'
model_type = 'efficientnet'
normalise_data = False
data_dir = "../quant_notes/data_cmpr"
image_size = 200
attribute_type = 'race'  # CHANGE THIS FOR DIFF MODELS

In [3]:
trn_dataset = EyeFair(
    os.path.join(data_dir, "train"),
    depth=3 if model_type == "resnext" else 1,
    modality_type=modality_types,
    task=task,
    resolution=image_size,
    attribute_type=attribute_type,
    normalise_data=normalise_data
)

min: -31.9900, max: 2.2700


In [4]:
batch_size = 6
validation_dataset_loader = torch.utils.data.DataLoader(trn_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [5]:
def test(model, criterion, optimizer, data_loader, epoch, identity_Info=None, _device='cuda'):
    res = validation(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=_device)
    return res[1]

In [6]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

##########################

In [7]:
!pip freeze | grep accelerate

accelerate==0.31.0


In [31]:
import torch
import transformers
from transformers import AutoImageProcessor, AutoModelForImageClassification, QuantoConfig
# from optimum.quanto import qint8, quantize

model_paths_race = [
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_6838_auc0.8511",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_8030_auc0.8426",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_6057_auc0.8351",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_99_auc0.8382",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_race_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_5677_auc0.8419"
]

model_paths_gender = [
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_1023_auc0.8284",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_2358_auc0.8323",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_3086_auc0.8368",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_3290_auc0.8479",
"/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_6044_auc0.8258"
]

if attribute_type == "race":
    model_paths = model_paths_race
elif attribute_type == "gender":
    model_paths = model_paths_gender
else:
    raise ValueError("Invalid attribute type")

actual_model_paths = []

for model_path in model_paths:
    actual_model_paths.append(model_path + "/best_model")
    # actual_model_paths.append(model_path + "/model_epoch_9")

model_paths = actual_model_paths
model_paths.reverse()

In [32]:
from transformers import AutoModel, AutoConfig
from src.modules import EfficientNetWrapper
from safetensors.torch import load_file

models = []

for model_path in model_paths:
    config = AutoConfig.from_pretrained(model_path, device='cuda')
    
    model = EfficientNetWrapper(config)
    
    state_dict = load_file(model_path + "/model.safetensors")
    model.load_state_dict(state_dict)
    
    model.eval()
    model.to('cuda')
    
    models.append(model)

In [33]:
model.dtype

torch.float32

In [34]:
for model in models[-1:]:
    test(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)

cuda:0
test <==== epcoh 10 loss: 0.5278 auc: 0.8512
0-attr auc: 0.8754
1-attr auc: 0.8065
2-attr auc: 0.8512


In [35]:
from optimum.quanto import quantize, qint8, qint4, qint2, QTensor

In [36]:
for model in models:
    quantize(model, weights=qint2)

In [37]:
test(models[-1], criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)

cuda:0
test <==== epcoh 10 loss: 26633.8418 auc: 0.5398
0-attr auc: 0.5275
1-attr auc: 0.5426
2-attr auc: 0.5041


0.5455555555555556

In [20]:
from optimum.quanto import freeze

In [17]:
for model in models:
    freeze(model)

In [55]:
print_size_of_model(model)

Size (MB): 26.609645


In [56]:
model = models[-1]

In [38]:
# QAT, don't freeze above if u want to do this.
for model in models:
    model.train()
    scaler = None #torch.cuda.amp.GradScaler()
    
    optimizer = AdamW(model.parameters(), lr=5e-5, betas=(0.0, 0.1), weight_decay=6e-5)
    
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
    
    trn_dataset = EyeFair(os.path.join("../quant_notes/data_cmpr/", 'train'), normalise_data=False, modality_type="rnflt", task="cls", resolution=200, attribute_type="gender", depth=1)
    for epoch in range(4):
        loss_batch = []
        top1_accuracy_batch = []
        preds = []
        gts = []
        attrs = []
    
        train_dataset_loader = torch.utils.data.DataLoader(
            trn_dataset, batch_size=6, shuffle=True,
            num_workers=8, pin_memory=True, drop_last=True)
    
        for batch_idx, (input, target, attr) in enumerate(train_dataset_loader):
            input = input.to(device)
            target = target.to(device)
            attr = attr.to(device)
            
            optimizer.zero_grad()
        
            pred = model(input, attr)
            
            if isinstance(pred, QTensor):
                pred = pred.dequantize()
            
            pred = pred.squeeze(1)
        
            loss = criterion(pred, target)
            
            pred_prob = torch.sigmoid(pred.detach())
            preds.append(pred_prob.detach().cpu().numpy())
            gts.append(target.detach().cpu().numpy())
            attrs.append(attr.detach().cpu().numpy())
        
            loss_batch.append(loss.item())
            
            top1_accuracy = accuracy(pred.detach().cpu().numpy(), target.detach().cpu().numpy(), topk=(1,))
            top1_accuracy_batch.append(top1_accuracy)
        
            loss.backward()
            optimizer.step()
        
        preds = np.concatenate(preds, axis=0)
        gts = np.concatenate(gts, axis=0)
        attrs = np.concatenate(attrs, axis=0).astype(int)
        cur_auc = auc_score(preds, gts)
        acc = accuracy(preds, gts, topk=(1,))
        
        print(f"train ====> epoch {epoch} loss: {np.mean(loss_batch):.4f} auc: {cur_auc:.4f}")
    model.eval()

min: -31.9900, max: 2.2700
train ====> epoch 0 loss: 0.5854 auc: 0.7621
train ====> epoch 1 loss: 0.5502 auc: 0.8018
train ====> epoch 2 loss: 0.5253 auc: 0.8180
train ====> epoch 3 loss: 0.4962 auc: 0.8439
min: -31.9900, max: 2.2700
train ====> epoch 0 loss: 0.5904 auc: 0.7568
train ====> epoch 1 loss: 0.5658 auc: 0.7798
train ====> epoch 2 loss: 0.5304 auc: 0.8113
train ====> epoch 3 loss: 0.5018 auc: 0.8353
min: -31.9900, max: 2.2700
train ====> epoch 0 loss: 0.6035 auc: 0.7441
train ====> epoch 1 loss: 0.5424 auc: 0.7970
train ====> epoch 2 loss: 0.5315 auc: 0.8097
train ====> epoch 3 loss: 0.5118 auc: 0.8244
min: -31.9900, max: 2.2700
train ====> epoch 0 loss: 0.5870 auc: 0.7514
train ====> epoch 1 loss: 0.5364 auc: 0.8030
train ====> epoch 2 loss: 0.5186 auc: 0.8201
train ====> epoch 3 loss: 0.5038 auc: 0.8324
min: -31.9900, max: 2.2700
train ====> epoch 0 loss: 0.5981 auc: 0.7555
train ====> epoch 1 loss: 0.5430 auc: 0.8028
train ====> epoch 2 loss: 0.5155 auc: 0.8238
train ====

In [39]:
test(model, criterion, None, validation_dataset_loader, 10, identity_Info=imb_info, _device=device)

cuda:0
test <==== epcoh 10 loss: 0.5704 auc: 0.8295
0-attr auc: 0.8460
1-attr auc: 0.7834
2-attr auc: 0.8600


0.7211111111111111

In [40]:
print_size_of_model(model)

Size (MB): 26.609645


In [41]:
freeze(model)

In [42]:
print_size_of_model(model)

Size (MB): 3.057125


In [43]:
import torch
import os
import numpy as np
from src.modules import forward_model_with_fin
from src.data_handler import EyeFair

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def final_metrics(model, validation_dataset_loader, attribute_type, number_of_classes):
    # model.eval()
    
    preds = []
    gts = []
    attrs = []

    with torch.no_grad():
        for i, (input, target, attr) in enumerate(validation_dataset_loader):
            input = input.to(device)
            target = target.to(device)
            attr = attr.to(device)
            
            pred = forward_model_with_fin(model, input, attr)
            pred = pred.squeeze(1)

            pred_prob = torch.sigmoid(pred.detach())
            preds.append(pred_prob.detach().cpu().numpy())
            gts.append(target.detach().cpu().numpy())
            attrs.append(attr.detach().cpu().numpy())

    preds = np.concatenate(preds, axis=0)
    gts = np.concatenate(gts, axis=0)
    attrs = np.concatenate(attrs, axis=0).astype(int)
    
    acc = accuracy(preds, gts, topk=(1,))
    auc = auc_score(preds, gts)
    es_acc = equity_scaled_accuracy(preds, gts, attrs)
    es_auc = equity_scaled_AUC(preds, gts, attrs)

    pred_labels = (preds >= 0.5).astype(float)
    dpd = demographic_parity_difference(gts, pred_labels, sensitive_features=attrs)
    eod = equalized_odds_difference(gts, pred_labels, sensitive_features=attrs)

    auc_groups = []
    for attr_value in range(number_of_classes):
        mask = attrs == attr_value
        auc_groups.append(auc_score(preds[mask], gts[mask]))

    if attribute_type == "race":
        return es_acc, acc, es_auc, auc, auc_groups[0], auc_groups[1], auc_groups[2], dpd, eod
    elif attribute_type == "gender":
        return es_acc, acc, es_auc, auc, auc_groups[0], auc_groups[1], dpd, eod


in_dim = 1
out_dim = 1
extra_info = {
    'in_feat_to_final': 1280,
    'normalization_type': 'fin',
    'ag_norm_params': {"num_attr": 3, "dim": 1280, "mu": 0.01, "sigma": 1., "momentum": 0.3}
}

In [44]:
# Prepare the validation dataset and loader
validation_dataset = EyeFair('../quant_notes/data_cmpr/test', normalise_data=False, modality_type='rnflt', task='cls', resolution=200, attribute_type=attribute_type)
validation_dataset_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=256, shuffle=False, num_workers=8, pin_memory=True)

min: -31.1600, max: 2.5300


In [45]:
for model in models:
    # Perform validation and get metrics
    metrics = final_metrics(model, validation_dataset_loader, attribute_type, number_of_classes=3 if attribute_type == "race" else 2)
    
    # Print the metrics
    print("\t".join(f"{metric:.4f}" for metric in metrics))

0.6952	0.7478	0.7516	0.8236	0.8652	0.7722	0.8207	0.2133	0.2639
0.6741	0.7100	0.7362	0.8065	0.8380	0.7530	0.8169	0.1867	0.2646
0.5891	0.6389	0.7557	0.7949	0.8227	0.7764	0.7892	0.0900	0.1245
0.6657	0.7367	0.7392	0.8193	0.8343	0.7639	0.8573	0.1267	0.1172
0.6964	0.7211	0.7588	0.8295	0.8460	0.7834	0.8600	0.1567	0.1868


In [109]:
for model_name in model_paths:
    print(model_name)

/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_6044_auc0.8258/model_epoch_9
/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_6044_auc0.8258/best_model
/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_3290_auc0.8479/model_epoch_9
/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_fin_gender_ablation_of_sigma/fullysup_efficientnet_rnflt_Taskcls_lr5e-5_bz6_normdata0_3290_auc0.8479/best_model
/home/platelminto/Documents/uu/dev/hcml/groupassingment/Harvard-GF-Quantization/results/crosssectional_rnflt_f

In [3]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print("Torch version:", torch.__version__)
print("Transformers version:", transformers.__version__)


True
NVIDIA GeForce RTX 4060 Ti
Torch version: 2.3.1
Transformers version: 4.42.0.dev0


In [16]:
!pip show bitsandbytes

Name: bitsandbytes
Version: 0.43.1
Summary: k-bit optimizers and matrix multiplication routines.
Home-page: https://github.com/TimDettmers/bitsandbytes
Author: Tim Dettmers
Author-email: dettmers@cs.washington.edu
License: MIT
Location: /home/platelminto/miniconda3/envs/harvard_gf/lib/python3.10/site-packages
Requires: numpy, torch
Required-by: 
