> **NOTE**: you should run `openvino_benchmark.ipynb` notebook first that will generate `openvino_fp32` folder with the exported models that are used for quantization.

In [26]:
import os
import torch

from PIL import Image
import open_clip

In [27]:
name = "ViT-B-16" #-plus-240
pretrained = "laion400m_e32"
model, train_transform, eval_transform = open_clip.create_model_and_transforms(name, pretrained=pretrained)

In [28]:
from pathlib import Path

source_dir = "openvino_fp32"
ouptut_dir = "openvino_int8"
if not os.path.exists(ouptut_dir):
    os.makedirs(ouptut_dir)
with open(Path(ouptut_dir) / "model_index.txt", 'w') as fd:
    fd.write(f"{name},{pretrained}\n")

In [29]:
tokenizer = open_clip.get_tokenizer(name)

image = eval_transform(Image.open("../../docs/CLIP.png")).unsqueeze(0)
text = tokenizer(["a diagram", "a dog", "a cat"])

### Prepare data for optimization

In [30]:
import random
from io import BytesIO
import requests
import numpy as np

def get_pil_from_url(url):
    response = requests.get(url)
    image = Image.open(BytesIO(response.content))
    return image.convert("RGB")

def check_text_data(data):
    if isinstance(data, str):
        return True
    if isinstance(data, list):
        return all(isinstance(x, str) for x in data)
    return False    

def laion2B_preprocess_train(examples, train_transforms, tokenize_captions, image_column="url", text_column="caption"):
    url = examples[image_column]
    try:
        image = get_pil_from_url(url)
        if not check_text_data(examples[text_column]):
            raise ValueError("Text data is not valid")
    except Exception:
        print(f"Can't load image from url: {url}")
        return None

    examples["pixel_values"] = train_transforms(image)
    examples["text"] = tokenize_captions(examples)
    return examples

def tokenize_captions(examples, is_train=True):
    caption_column = "caption"
    captions = []
    caption = examples[caption_column]
    if isinstance(caption, str):
        captions.append(caption)
    elif isinstance(caption, (list, np.ndarray)):
        # take a random caption if there are multiple
        captions.append(random.choice(caption) if is_train else caption[0])
    else:
        raise ValueError(f"Caption column `{caption_column}` should contain either strings or lists of strings.")
    input_ids = tokenizer(captions[0])[0]
    return input_ids

In [31]:
from datasets import load_dataset

max_train_samples = 10000
dataset = load_dataset("laion/laion400m", streaming=True)
train_dataset = dataset["train"].shuffle(seed=42, buffer_size=max_train_samples)

In [32]:
cast_dtype = model.transformer.get_cast_dtype()

def collate_fn(examples):
    examples = [laion2B_preprocess_train(example, train_transform, tokenize_captions) for example in examples]
    if not any(examples):
        return None
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()
    
    input_ids = torch.stack([example["text"] for example in examples])
    return {
        "pixel_values": pixel_values,
        "input_ids": input_ids
    }

In [33]:
import itertools
from tqdm.notebook import tqdm

def prepare_calibration_data(dataloader, init_steps):
    data = []

    print(f"Fetching {init_steps} for the initialization...")
    for _, batch in tqdm(zip(range(init_steps), itertools.islice(dataloader, 0, init_steps))):
        with torch.no_grad():
            # Convert images to latent space
            if batch:
                data.append(
                    (
                        batch["pixel_values"].to("cpu"),
                        batch["input_ids"].to("cpu")
                    )
                )
    return data

In [34]:
batch_size = 1
dataloader_num_workers = 4
dataloader = torch.utils.data.DataLoader(
        train_dataset, collate_fn=collate_fn, batch_size=batch_size, num_workers=dataloader_num_workers
    )

In [35]:
opt_init_steps = 500
calibration_data = prepare_calibration_data(dataloader, opt_init_steps)

Fetching 500 for the initialization...


0it [00:00, ?it/s]

Can't load image from url: https://s3.amazonaws.com/whataspace/space_pictures/pictures/000/033/180/fullwidth/_DSC0146.jpg?1591610094
Can't load image from url: https://i0.wp.com/des.gearbest.com/uploads/pdm-desc-pic/Electronic/image/2016/11/25/1480056293210706.jpg?w=960
Can't load image from url: http://cdn3.static-homes.com/cgi-bin/readimage/9eb0ae4fa92b5dfd09b03c9e3dc997c5_1_resizeto_193x143x1
Can't load image from url: https://img.shellporn.com/spcs/thumbs/155/299_hotel_wang_.jpg
Can't load image from url: https://sslh.ulximg.com/image/740x493/cover/1533483761_ac8b9fbc89ca57a7269ec0c7f5947094.jpg/2143af354a3c53d14969369d2c6cbc04/1533483761_cc5f2e1e234c809fa4408488e0b19e4d.jpg
Can't load image from url: http://resizing.flixster.com/wp7S_BA23xLXC2iPn_Ozyuck-m8=/320x455/dkpu1ddg7pbsk.cloudfront.net/movie/26/93/269324_ori.jpg
Can't load image from url: http://dyn1.heritagestatic.com/lf?set=path%5B9%2F5%2F5%2F4%2F9554311%5D%2Csizedata%5B220x350%5D&call=url%5Bfile%3Aproduct.chain%5D
Can't

### Quantize Image Encoder

In [48]:
import nncf
from nncf.quantization.advanced_parameters  import AdvancedQuantizationParameters, AdvancedBiasCorrectionParameters
from nncf.scopes import IgnoredScope

ignored_scope = IgnoredScope(
    types = ["Divide"], # fix for combination of ToMe method + 8-bit quantization
    names = [
        "/transformer/resblocks.0/attn/MatMul_2",
        "/transformer/resblocks.1/attn/MatMul_2",
        "/transformer/resblocks.2/attn/MatMul_2",
        "/transformer/resblocks.3/attn/MatMul_2",
        "/transformer/resblocks.4/attn/MatMul_2",
        "/transformer/resblocks.5/attn/MatMul_2",
        "/transformer/resblocks.6/attn/MatMul_2",
        "/transformer/resblocks.7/attn/MatMul_2",
        "/transformer/resblocks.8/attn/MatMul_2",
        "/transformer/resblocks.9/attn/MatMul_2",
        "/transformer/resblocks.10/attn/MatMul_2",
        "/transformer/resblocks.11/attn/MatMul_2",
    ]
)


advanced_parameters = AdvancedQuantizationParameters(
    backend_params = {'use_pot': True},  # use legacy backend that supports Bias Correction
    bias_correction_params = AdvancedBiasCorrectionParameters(apply_for_all_nodes=True, threshold=float('inf')), # use Bias Correction for all the quantized nodes
    overflow_fix="disable" # disable overflow fix (can lead to accuracy drop on legacy platforms w/o DL Boost)
)

def quantize_image_encoder(model, data_loader):
    quantization_dataset = nncf.Dataset(data_loader, lambda x: x[0])

    quantized_model = nncf.quantize(
                            model,
                            quantization_dataset,
                            model_type=nncf.ModelType.TRANSFORMER,
                            fast_bias_correction=False,
                            subset_size=opt_init_steps,
                            advanced_parameters=advanced_parameters,
                            ignored_scope=ignored_scope,
                            )
    return quantized_model

In [49]:
import openvino.runtime as ov
from pathlib import Path

ov_model_path = Path(source_dir) / "image_encoder.xml"

core = ov.Core()
image_encoder = core.read_model(ov_model_path)

q_image_encoder = quantize_image_encoder(image_encoder, calibration_data)

INFO:nncf:24 ignored nodes was found by types in the NNCFGraph
INFO:nncf:212 ignored nodes was found by types in the NNCFGraph
INFO:nncf:Not adding activation input quantizer for operation: 6 /Add_1
INFO:nncf:Not adding activation input quantizer for operation: 9 /ln_pre/Div
11 /ln_pre/Mul
13 /ln_pre/Add_1

INFO:nncf:Not adding activation input quantizer for operation: 17 /transformer/resblocks.0/ln_1/Div
20 /transformer/resblocks.0/ln_1/Mul
24 /transformer/resblocks.0/ln_1/Add_1

INFO:nncf:Not adding activation input quantizer for operation: 124 /transformer/resblocks.0/attn/Div_2
INFO:nncf:Not adding activation input quantizer for operation: 16 /transformer/resblocks.0/Add
INFO:nncf:Not adding activation input quantizer for operation: 19 /transformer/resblocks.0/ln_2/Div
23 /transformer/resblocks.0/ln_2/Mul
28 /transformer/resblocks.0/ln_2/Add_1

INFO:nncf:Not adding activation input quantizer for operation: 18 /transformer/resblocks.0/Add_1
INFO:nncf:Not adding activation input quan

 81%|████████  | 404/500 [00:36<00:08, 10.95it/s]


In [50]:
ov.serialize(q_image_encoder, ouptut_dir + "/image_encoder.xml")

### Quantize Text Encoder

In [51]:
advanced_parameters = AdvancedQuantizationParameters(
    backend_params = {'use_pot': True},  # use legacy backend that supports Bias Correction
    bias_correction_params = AdvancedBiasCorrectionParameters(apply_for_all_nodes=True, threshold=float('inf')), # use Bias Correction for all the quantized nodes
    overflow_fix="disable" # disable overflow fix (can lead to accuracy drop on legacy platforms w/o DL Boost)
    )


def quantize_text_encoder(model, data_loader):
    quantization_dataset = nncf.Dataset(data_loader, lambda x: x[1])

    quantized_model = nncf.quantize(
                            model,
                            quantization_dataset,
                            model_type=nncf.ModelType.TRANSFORMER,
                            fast_bias_correction=False,
                            subset_size=opt_init_steps,
                            advanced_parameters=advanced_parameters,
                            ignored_scope=ignored_scope,
                            )
    return quantized_model

In [52]:
ov_model_path = Path(source_dir) / "text_encoder.xml"

core = ov.Core()
text_encoder = core.read_model(ov_model_path)

q_text_encoder = quantize_text_encoder(text_encoder, calibration_data)

INFO:nncf:223 ignored nodes was found by types in the NNCFGraph
INFO:nncf:Not adding activation input quantizer for operation: 6 /ArgMax
INFO:nncf:Not adding activation input quantizer for operation: 9 /Add_1
INFO:nncf:Not adding activation input quantizer for operation: 3 /Add
INFO:nncf:Not adding activation input quantizer for operation: 8 /transformer/resblocks.0/ln_1/Div
12 /transformer/resblocks.0/ln_1/Mul
17 /transformer/resblocks.0/ln_1/Add_1

INFO:nncf:Not adding activation input quantizer for operation: 121 /transformer/resblocks.0/attn/Div_2
INFO:nncf:Not adding activation input quantizer for operation: 7 /transformer/resblocks.0/Add
INFO:nncf:Not adding activation input quantizer for operation: 11 /transformer/resblocks.0/ln_2/Div
16 /transformer/resblocks.0/ln_2/Mul
22 /transformer/resblocks.0/ln_2/Add_1

INFO:nncf:Not adding activation input quantizer for operation: 10 /transformer/resblocks.0/Add_1
INFO:nncf:Not adding activation input quantizer for operation: 15 /transfo

 81%|████████  | 404/500 [00:17<00:04, 23.42it/s]


In [53]:
ov.serialize(q_text_encoder, ouptut_dir + "/text_encoder.xml")