# Training YoloNAS on custom dataset, applying post-training quantization, converting to ONNX, and then OpenVINO IR & doing inference on that 
https://github.com/Deci-AI/super-gradients/blob/master/notebooks/yolo_nas_custom_dataset_fine_tuning_with_qat.ipynb

# Installations

In [None]:
!pip3 install -qq super-gradients==3.7.1 datasets[vision]~=2.1 pandas~=2.0

# DATASET SETUP

In [15]:
"""
    Remember what class names you labeled your images with. 
    If you assigned labels of “truck” and “car” for example, replace 
    the CLASS_NAMES line with CLASS_NAMES = [“truck”, “car”]
"""
CLASS_NAMES = ['vehicle'] # Update with an array of class names 
NUM_CLASSES = len(CLASS_NAMES)

# Setup train and validation datasets

In [2]:
# Define training parameters 

"""

The program will concatenate data_dir/images_dir and data_dir/json_annotation_file. 
These paths are local to the .ipynb train file you are using. 

My project structure was as follows: 
ROOT 
    -Train.ipynb 
    -Vehicle_images
        -Image1.png 
        -Img2.png 
        -…. ImgX.png 
        -Train.json 
        -Val.json 

So, therefore, for me: 
data_dir =“vehicle_images”
images_dir=””
json_annotation_file=”train.json” 

If necessary, in dataset_params for train and val, update the 3 locations within the “transforms” parameter where 
it says target_size and input_dim to match your models input dimensions in (H, W) format. 

"""


from super_gradients.training.datasets.detection_datasets.coco_format_detection import COCOFormatDetectionDataset
from super_gradients.training.transforms.transforms import (
    DetectionRandomAffine,
    DetectionHSV,
    DetectionHorizontalFlip,
    DetectionPaddedRescale,
    DetectionStandardize,
    DetectionTargetsFormatTransform,
)
from super_gradients.training.utils.collate_fn import DetectionCollateFN

train_dataset_params = dict(
    data_dir="vehicle_images", # local path to directory that contains both images and json annotation file
    images_dir="", # Local path FROM DATA DIR to where images are located (data_dir/path/to/images)
    json_annotation_file="train.json", # Local path FROM DATA DIR to where train.json is located
    input_dim=(1920, 1088),
    ignore_empty_annotations=False,
    with_crowd=False,
    all_classes_list=CLASS_NAMES,
    transforms=[
        DetectionRandomAffine(degrees=0.0, scales=(0.5, 1.5), shear=0.0, target_size=(640, 640), filter_box_candidates=False, border_value=128), # update target_size if necessary
        DetectionHSV(prob=1.0, hgain=5, vgain=30, sgain=30),
        DetectionHorizontalFlip(prob=0.5),
        DetectionPaddedRescale(input_dim=(640, 640)), # update 
        DetectionStandardize(max_value=255),
        DetectionTargetsFormatTransform(input_dim=(640, 640), output_format="LABEL_CXCYWH"), # update input_dim 
    ],
)


# make the same changes above here 
valid_dataset_params = dict(
    data_dir="vehicle_images", 
    images_dir="",
    json_annotation_file="val.json", # val.json, not train.json 
    input_dim=(640, 640),
    ignore_empty_annotations=False,
    with_crowd=False,
    all_classes_list=CLASS_NAMES,
    transforms=[
        DetectionPaddedRescale(input_dim=(640, 640), max_targets=300),
        DetectionStandardize(max_value=255),
        DetectionTargetsFormatTransform(input_dim=(640, 640), output_format="LABEL_CXCYWH"),
    ],
)

trainset = COCOFormatDetectionDataset(**train_dataset_params)
valset = COCOFormatDetectionDataset(**valid_dataset_params)
     

[2024-07-11 11:38:32] INFO - crash_tips_setup.py - Crash tips is enabled. You can set your environment variable to CRASH_HANDLER=FALSE to disable it


The console stream is logged into /home/lpalombi/sg_logs/console.log


[2024-07-11 11:38:35] INFO - detection_dataset.py - Dataset Initialization in progress. `cache_annotations=True` causes the process to take longer due to full dataset indexing.
Indexing dataset annotations: 100%|██████████| 441/441 [00:00<00:00, 7207.49it/s]
[2024-07-11 11:38:35] INFO - detection_dataset.py - Dataset Initialization in progress. `cache_annotations=True` causes the process to take longer due to full dataset indexing.


Indexing dataset annotations: 100%|██████████| 111/111 [00:00<00:00, 10115.54it/s]


In [16]:
from torch.utils.data import DataLoader

NUM_WORKERS = 0
BATCH_SIZE = 1 # update batch size 

train_dataloader_params = {
    "shuffle": True,
    "batch_size": BATCH_SIZE,
    "drop_last": True,
    "pin_memory": True,
    "collate_fn": DetectionCollateFN(),
    "num_workers": NUM_WORKERS,
    "persistent_workers": NUM_WORKERS > 0,
}

val_dataloader_params = {
    "shuffle": False,
    "batch_size": BATCH_SIZE,
    "drop_last": False,
    "pin_memory": True,
    "collate_fn": DetectionCollateFN(),
    "num_workers": NUM_WORKERS,
    "persistent_workers": NUM_WORKERS > 0,
}

train_loader = DataLoader(trainset, **train_dataloader_params)
valid_loader = DataLoader(valset, **val_dataloader_params)

# Defining training hyperparameters

In [17]:
from super_gradients.training.losses import PPYoloELoss
from super_gradients.training.metrics import DetectionMetrics_050
from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback

"""
    From the original tutorial I updated the following parameters: 

        Warmup_initial_lr: 1e-6
        Cosine_final_lr_ratio: 0.1 
        Optimizer: Adam 
        Lr_warmup_epochs: 3 
        Warmup_mode: linear_epoch_step 
        Ema: true 
        Delete average_best_models line 
        Ema_params: decay: 0.9, decay_type: threshold 
        Max_epochs: 25 

"""


train_params = {
    "warmup_initial_lr": 1e-6, # updated
    "initial_lr": 5e-4,
    "lr_mode": "cosine",
    "cosine_final_lr_ratio": 0.1, # updated 
    "optimizer": "Adam",# updated 
    "zero_weight_decay_on_bias_and_bn": True,
    "lr_warmup_epochs": 3,# updated 
    "warmup_mode": "linear_epoch_step",# updated 
    "optimizer_params": {"weight_decay": 0.0001},
    "ema": True,# updated 
    "ema_params": {"decay": 0.9, "decay_type": "threshold"},# updated 
    "max_epochs": 25,# updated 
    "mixed_precision": True,
    "loss": PPYoloELoss(use_static_assigner=False, num_classes=NUM_CLASSES, reg_max=16),
    "valid_metrics_list": [
        DetectionMetrics_050(
            score_thres=0.1,
            top_k_predictions=300,
            num_cls=NUM_CLASSES,
            normalize_targets=True,
            include_classwise_ap=True,
            class_names=CLASS_NAMES,
            post_prediction_callback=PPYoloEPostPredictionCallback(score_threshold=0.01, nms_top_k=1000, max_predictions=300, nms_threshold=0.7),
        )
    ],
    "metric_to_watch": "mAP@0.50",
}
     

# Instantiate YoloNAS model and launch training 

In [None]:
"""
    KEEP IN MIND: The training and post-training quantization must be done in the same Jupyter Notebook session. 
    Do not restart the kernel in between these two steps or PTQ will not run. 


"""


from super_gradients.training import Trainer
from super_gradients.common.object_names import Models
from super_gradients.training import models
import os 

HOME = os.getcwd()

"""
    For  the Trainer: 
        The experiment_name can be anything you want. 
        ckpt_root_dir  can be anything you want. Typically it is called “checkpoints.” 
        No need to create any directories; these parameters define where the trained model data will be saved. 
"""
trainer = Trainer(experiment_name="vehicle_training", ckpt_root_dir="checkpoints") #update


model = models.get('yolo_nas_s', num_classes=NUM_CLASSES, pretrained_weights="coco")
trainer.train(model=model, training_params=train_params, train_loader=train_loader, valid_loader=valid_loader)

In [None]:
"""
    Take a second to locate your trained model. It should be in your checkpoints/experiment_name/RUN_####. 
    You should see various data, including a file called ckpt_best.pth. This is the file we will be focusing on. 
    The average_model.pth and ckpt_latest.pth gives the average weights and latest epoch, not the “best” model. 

    Update CKPT_PATH to match. 

"""
CKPT_PATH = f'{HOME}/checkpoints/vehicle_images/RUN_20240626_101822_978143/ckpt_best.pth'
best_model = models.get(Models.YOLO_NAS_S, num_classes=NUM_CLASSES, checkpoint_path=CKPT_PATH)
regular_metrics = trainer.test(model=best_model, test_loader=valid_loader)
regular_metrics

# Post-training quantization 

In [None]:
# Update image path to the image you'd like to perform inference on 
prediction = best_model.predict(f'{HOME}/vehicle_images/frame_0182.png', fuse_model=False)
prediction.show()

In [None]:

""" 
    This PTQ step may not work if the training was done in a separate session, 
    or if you restarted the kernel in between the training/this step. This step should generate 
    an onnx model. After running, take a second to locate the file. Should be in your checkpoints directory. 

"""

from super_gradients.conversion import ExportParams

best_model = models.get(Models.YOLO_NAS_S, num_classes=NUM_CLASSES, checkpoint_path=CKPT_PATH)

export_params = ExportParams(batch_size=1, preprocessing=True, postprocessing=True)

ptq_result = trainer.ptq(model=best_model, calib_loader=valid_loader, valid_loader=valid_loader, export_params=export_params)
ptq_metrics = ptq_result.valid_metrics_dict

# Inspect metrics of model after PTQ

In [None]:
ptq_metrics

# Predict with PTQ model

In [None]:
# Update image path to match 
prediction = ptq_result.quantized_model.predict(f'{HOME}/vehicle_images/frame_0182.png', fuse_model=False)
prediction.show()

# Inference using exported ONNX model 

In [None]:
from super_gradients.training.utils.detection_utils import DetectionVisualization
from super_gradients.training.utils.media.image import load_image
import onnxruntime
import numpy as np
import cv2 

import matplotlib.pyplot as plt
from super_gradients.inference import iterate_over_detection_predictions_in_batched_format


def show_predictions_from_batch_format(image, predictions=None):
    image_index, pred_boxes, pred_scores, pred_classes = next(iter(iterate_over_detection_predictions_in_batched_format(predictions)))

    predicted_boxes = np.concatenate([pred_boxes, pred_scores[:, np.newaxis], pred_classes[:, np.newaxis]], axis=1)

    image = DetectionVisualization.visualize_image(image_np=np.array(image), class_names=CLASS_NAMES, pred_boxes=predicted_boxes)

    plt.figure(figsize=(8, 8))
    plt.imshow(image)
    plt.tight_layout()
    plt.show()


# Update path to onnx file 
session = onnxruntime.InferenceSession(f'{HOME}/checkpoints/vehicle_images/ptq.onnx', providers=["CUDAExecutionProvider", "CPUExecutionProvider"])

inputs = [o.name for o in session.get_inputs()]
outputs = [o.name for o in session.get_outputs()]

# Update path to image file 
image = load_image(f'{HOME}/padded_images/frame_0182.jpg')
image = cv2.resize(image, (640, 640)) # update to match model input dimensions 

input_image = np.moveaxis(image, -1, 0)
input_image = np.expand_dims(input_image, axis=0)


predictions1 = session.run(outputs, {inputs[0]: input_image})
show_predictions_from_batch_format(image, predictions1)


# Inference with OpenVino IR 
Based off of this tutorial https://docs.openvino.ai/2023.3/notebooks/004-hello-detection-with-output.html

In [None]:
# Note: you may need to restart the kernel to use updated packages.
%pip install -q "openvino>=2023.1.0"

## Imports

In [18]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import openvino as ov
from pathlib import Path

# Fetch `notebook_utils` module
import requests

# These lines have changed from the original tutorial 
r = requests.get(
    url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
)

open("notebook_utils.py", "w").write(r.text)

23215

## Download model weights 

In [20]:
import os 
HOME = os.getcwd()

# Modify to point to path of your xml file 
model_xml_path = f'{HOME}/checkpoints/vehicle_images/ptq.xml'

## Select inference device

In [9]:
import ipywidgets as widgets

core = ov.Core()
device = widgets.Dropdown(
    options=core.available_devices + ["AUTO"],
    value="AUTO",
    description="Device:",
    disabled=False,
)

device

"""
    Can run cell and select device you'd like to use for inference 
"""

Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO')

## Load the model 

In [21]:
core = ov.Core()

model = core.read_model(model=model_xml_path)
compiled_model = core.compile_model(model=model, device_name=device.value) # Updated from original code to use device.value 

""" 
    In the tutorial, they write: 
        output_layer_ir = compiled_model.output("boxes")
    However, this output value of "boxes" is not universal to all models. Test with my code below, but if it is 
    not working or recognizing the output value, follow the instructions below to see what your output value is. 
        
	-Comment out the last three lines so there are no errors displayed. 
    -Type print(model) and look at the results. You should see a list of ConstOutput values. 
    -My model had 4 outputs: graph2_num_predictions, graph2_pred_boxes, graph2_pred_scores, and graph2_pred_classes. 
    -Since we want to draw a bounding box around objects in an image, I used “graph2_pred_boxes.” 
    -I changed the code as follows: Output_layer_ir = compiled_model.output(“graph2_pred_boxes”)
    -I also noticed that the prediction scores were in the output and extracted those so we can mark our image with the prediction scores as well. I added in this line: 
    -Output_layer_scores = compiled_model.output(“graph2_pred_scores”)

"""


input_layer_ir = compiled_model.input(0)
output_layer_ir = compiled_model.output("graph2_pred_boxes")
output_layer_scores = compiled_model.output("graph2_pred_scores")

## Load an image 

In [23]:
# Update image path to match  
image = cv2.imread(f'{HOME}/padded_images/frame_0182.jpg')

# N,C,H,W = batch size, number of channels, height, width.
N, C, H, W = input_layer_ir.shape

#Resize the image to meet network expected input sizes.
resized_image = cv2.resize(image, (W, H))

# Reshape to the network input shape.
input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)

plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

## Do inference 

In [13]:
""" 
    Above “boxes”, I added in: 
        Pred_scores = compiled_model([input_image])[output_layer_scores] from above where I extracted the output scores. 
    
    The second line, boxes = boxes[~np.all(boxes == 0, axis=1)], caused an error. 
    This line is intended to remove “zero-only boxes.” However, in addition to this line not working, 
    my “null” predictions were marked with -1, not 0. Therefore, I modified this line to be: 
        Boxes = np.array([box for box in boxes[0] if not np.all(box == -1)])

    I also replicated this for pred_scores: 
    pred_scores= np.array([score for score in pred_scores[0] if not np.all(score == -1)])

    
    If both 0 and -1 are causing you issues, print(boxes) to see if there is another value that may 
    signify null/0/-1 boxes. 

"""

# Create an inference request.
pred_scores = compiled_model([input_image])[output_layer_scores]

boxes = compiled_model([input_image])[output_layer_ir]

# # Remove zero only boxes.
boxes = np.array([box for box in boxes[0] if not np.all(box == -1)])  # Assuming single batch
pred_scores = np.array([score for score in pred_scores[0] if not np.all(score == -1)])  # Assuming single batch

## Vizualize results 

In [14]:
""" 
    This object detection tutorial assumekd that the inference request returns the prediction scores and the 
    bounding box coordinates in the same call. However, in this model, it actually returned pred scores and 
    bounding boxes as two separate outputs. So, the below function is modified from the original tutorial to 
    match the model output format. 

"""

# added pred_scores as a paramerer 
def convert_result_to_image(bgr_image, resized_image, boxes, pred_scores, threshold=0.3, conf_labels=True):
    # Define colors for boxes and descriptions.
    colors = {"red": (255, 0, 0), "green": (0, 255, 0)}

    # Fetch the image shapes to calculate a ratio.
    (real_y, real_x), (resized_y, resized_x) = (
        bgr_image.shape[:2],
        resized_image.shape[:2],
    )
    ratio_x, ratio_y = real_x / resized_x, real_y / resized_y

    # Convert the base image from BGR to RGB format.
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)

    # Updated for-loop to loop through index
    for i in range(0, len(boxes)):
        
        # Pick a confidence factor from the last place in an array.
        conf = pred_scores[i] 
        if conf > threshold:
            # Convert float to int and multiply corner position of each box by x and y ratio.
            # If the bounding box is found at the top of the image,
            # position the upper box bar little lower to make it visible on the image.
            
            (x_min, y_min, x_max, y_max) = [
                (int(max(corner_position * ratio_y, 10)) if idx % 2 else int(corner_position * ratio_x)) for idx, corner_position in enumerate(boxes[i])
            ]

            # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
            rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)

            # Add text to the image based on position and confidence.
            # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
            if conf_labels:
                rgb_image = cv2.putText(
                    rgb_image,
                    f"{conf:.2f}",
                    (x_min, y_min - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8,
                    colors["red"],
                    1,
                    cv2.LINE_AA,
                )

    return rgb_image

In [26]:
plt.figure(figsize=(10, 6))
plt.axis("off")
img = convert_result_to_image(resized_image, resized_image, boxes, pred_scores, conf_labels=True)
plt.imshow(img)