# Tutorial: quantize and run custom network

This brief tutorial shows how to compress a custom network with EfficientBioAI and do the inference. We use a simple 2d unet to do the 2d semantic segmentation task on the [Simulated nuclei of HL60 cells stained with Hoescht](http://celltrackingchallenge.net/2d-datasets/).  PTQ int8 quantization are tried.

In [4]:
import os
import numpy as np
import torch
import monai
from monai.data import DataLoader, Dataset
from monai.transforms import (
    RandSpatialCropSamplesd,
    Compose,
    AddChanneld,
    ToTensord,
    Transform,
    CastToTyped,
    EnsureTyped,
    ScaleIntensityRangePercentilesd,
)
from monai.losses import DiceLoss
from tqdm.contrib import tenumerate
from aicsimageio import AICSImage

## 1. Prepare the dataset

#### 1.1 Download the dataset:

In [None]:
!wget http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-SIM+.zip -P ./data
!unzip ./data/Fluo-N2DH-SIM+.zip -d ./data
!rm ./data/Fluo-N2DH-SIM+.zip

#### 1.2 Generate the dataloader

In [39]:
train_data_path = "./data/Fluo-N2DH-SIM+/02"
train_gt_path = "./data/Fluo-N2DH-SIM+/02_GT/SEG"

test_data_path = "./data/Fluo-N2DH-SIM+/01"
test_gt_path = "./data/Fluo-N2DH-SIM+/01_GT/SEG"

In [40]:
def generate_data_dict(data_path, gt_path):
    data_dicts = []

    for i, (data, label) in tenumerate(zip(os.listdir(data_path), os.listdir(gt_path))):
        data_dict = {}
        data_dict["img"] = os.path.join(data_path, data)
        data_dict["seg"] = os.path.join(gt_path, label)
        data_dict["fn"] = data.split(".")[0]
        data_dicts.append(data_dict)
    return data_dicts


class LoadTiffd(Transform):
    def __init__(self, keys=["img", "seg"]):
        super().__init__()
        self.keys = keys

    def __call__(self, data):
        d = dict(data)
        for key in self.keys:
            x = AICSImage(data[key])
            d[key] = x.get_image_data("YX", S=0, T=0, C=0)
        return d


class Ins2Semd(Transform):
    def __init__(self, keys=["seg"]):
        super().__init__()
        self.keys = keys

    def __call__(self, data):
        d = dict(data)
        for key in self.keys:
            d[key][d[key] != 0] = 1
        return d


transform = Compose(
    [
        LoadTiffd(keys=["img", "seg"]),
        AddChanneld(keys=["img", "seg"]),
        CastToTyped(keys=["img"], dtype=np.float32),
        Ins2Semd(keys=["seg"]),
        EnsureTyped(keys=["img", "seg"]),
        ScaleIntensityRangePercentilesd(
            keys=["img"], lower=0.5, upper=99.5, b_min=0, b_max=1
        ),
        RandSpatialCropSamplesd(
            keys=["img", "seg"], roi_size=(256, 256), num_samples=4, random_size=False
        ),
        ToTensord(keys=["img", "seg"]),
    ]
)

In [41]:
dataset = Dataset(
    data=generate_data_dict(train_data_path, train_gt_path), transform=transform
)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=0)

150it [00:00, 94494.68it/s]


## 2. Train the model

We defined a naive Unet model and train it with several epochs.

In [8]:
from model.unet import Unet

In [9]:
net = Unet(in_channels=1, classes=2)
criterion = DiceLoss(to_onehot_y=True, softmax=True)
optimizer = torch.optim.Adam(net.parameters(), 1e-2)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num_epoch = 20
net.to(device)

torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
for i in range(num_epoch):
    # train step:
    net.train()
    epoch_loss = 0
    for j, batch_data in tenumerate(dataloader):
        data, label = batch_data["img"].to(device), batch_data["seg"].to(device)
        optimizer.zero_grad()
        out = net(data)
        loss = criterion(out, label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"epoch {i+1}/{num_epoch}, avg loss: {epoch_loss / len(dataloader)}")
    scheduler.step()

100%|██████████| 75/75 [00:42<00:00,  1.76it/s]

epoch 1/1, avg loss: 0.4054159450531006





In [10]:
torch.save(net.state_dict(), "./unet.pth")

## 3. Compress the model

implement PTQ int8 quantization.

In [11]:
state_dict = torch.load("./unet.pth")
net = Unet(in_channels=1, classes=2)
net.load_state_dict(state_dict)

<All keys matched successfully>

In [18]:
def calibrate(model, data, calib_num, device):
    model.eval()
    with torch.no_grad():
        for i, x in tenumerate(data):
            y_hat = model(x["img"].as_tensor())
            if i >= calib_num:
                break
    return model


def fine_tune(model, data, optimizer, criterion, num_epoch):
    model.train()
    for i in range(num_epoch):
        epoch_loss = 0
        for j, batch_data in tenumerate(data):
            data, label = batch_data["img"].to(device), batch_data["seg"].to(device)
            optimizer.zero_grad()
            out = model(data)
            loss = criterion(out, label)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"fine tune epoch {i+1}/{num_epoch}, avg loss: {epoch_loss / len(data)}")
    return model

In [13]:
from efficientbioai.compress_ppl import Pipeline
from efficientbioai.utils import Dict2ObjParser
import yaml

In [14]:
cfg_path = "./custom_config.yaml"
with open(cfg_path, "r") as stream:
    config_yml = yaml.safe_load(stream)
    config = Dict2ObjParser(config_yml).parse()

In [None]:
exp_path = "./exp"
pipeline = Pipeline.setup(config_yml)
pipeline(net, dataloader, fine_tune, calibrate, exp_path)
pipeline.network2ir()

# 4. Infer the model

We use the openvino inference engine to do the inference.

In [30]:
from efficientbioai.infer.backend.openvino import create_opv_model
from monai.inferers import sliding_window_inference

In [22]:
model_name = config.model.model_name
cfg_path = os.path.join(exp_path, f"{model_name}.yaml")
infer_path = os.path.join(exp_path, "academic_deploy_model.xml")

In [27]:
test_transform = Compose(
    [
        LoadTiffd(keys=["img", "seg"]),
        AddChanneld(keys=["img", "seg"]),
        CastToTyped(keys=["img"], dtype=np.float32),
        Ins2Semd(keys=["seg"]),
        EnsureTyped(keys=["img", "seg"]),
        ScaleIntensityRangePercentilesd(
            keys=["img"], lower=0.5, upper=99.5, b_min=0, b_max=1
        ),
        ToTensord(keys=["img", "seg"]),
    ]
)
test_dataset = Dataset(
    data=generate_data_dict(test_data_path, test_gt_path), transform=test_transform
)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

65it [00:00, 115815.53it/s]


inference with the quantized model.

In [33]:
quantized_model = create_opv_model(infer_path)

In [31]:
for i, batch_data in tenumerate(test_dataloader):
    data, label = batch_data["img"], batch_data["seg"]
    sliding_window_inference(
        inputs=data,
        predictor=quantized_model,
        device=torch.device("cpu"),
        roi_size=(128, 128),
        sw_batch_size=1,
        overlap=0.1,
        mode="constant",
    )

100%|██████████| 65/65 [00:29<00:00,  2.24it/s]


inference with the normal model (float32, not on the engine).

In [34]:
normal_model = net
normal_model.eval()
for i, batch_data in tenumerate(test_dataloader):
    data, label = batch_data["img"], batch_data["seg"]
    sliding_window_inference(
        inputs=data,
        predictor=normal_model,
        device=torch.device("cpu"),
        roi_size=(128, 128),
        sw_batch_size=1,
        overlap=0.1,
        mode="constant",
    )

100%|██████████| 65/65 [01:05<00:00,  1.01s/it]
