## Evaluate baseline model

In [None]:
%env CUDA_VISIBLE_DEVICES=0

In [None]:
import torch
from val import run as run_val

opt = {
    'data': 'data/coco128.yaml',
    'weights': 'yolov5s.pt',
    'half': True,
    'batch_size': 3, # 32
}

run_val(**opt);


## Optimize model with ENOT

In [None]:
import torch
from prune import run as run_prune

opt = {
    'device': 0,
    'data': 'data/coco128.yaml',
    'weights': 'yolov5s.pt',
    'half': True,
    'batch_size': 3, # 32
    'imgsz': 640,
    'hyp': 'data/hyps/hyp.coco_pruning.yaml',
    'name': 'prune_yolov5s_coco',
    'save_before_prune': True,
    'n_search_steps': 3, # This value is just for demo, in production we recommend to use more than 200 steps.
    'target-latency-fraction': 0.5, # It means that optimized model will be 2 times faster that baseline.
}

run_prune(**opt);
torch.cuda.empty_cache()

## Make onnx for original and optimized models

In [None]:
import torch
from export import run as run_export

opt = {
    'data': 'data/coco128.yaml',
    'weights': 'runs/prune/prune_yolov5s_coco/weights/original_model.pt',
    'batch_size': 1,
    'imgsz': [640],
    'include': ['onnx'],
}

run_export(**opt)

opt['weights'] = 'runs/prune/prune_yolov5s_coco/weights/pruned_model.pt'

run_export(**opt)
torch.cuda.empty_cache()

## Run optimized model tuning

In [None]:
import torch
from train import run as run_tune

opt = {
    'data': 'data/coco128.yaml',
    'weights': 'runs/prune/prune_yolov5s_coco/weights/pruned_model.pt',
    'batch_size': 3, # 32
    'imgsz': 640,
    'from_pruned': True,
    'epochs': 1,
    'device': 0,
    'name': 'tune_pruned_model',
}

run_tune(**opt)
torch.cuda.empty_cache()

## Evaluate tuned optimized model

In [None]:
import torch
from val import run as run_val

opt = {
    'data': 'data/coco128.yaml',
    'weights': 'runs/train/tune_pruned_model/weights/best.pt',
    'half': True,
    'batch_size': 3, # 32
    'imgsz': 640,
}

run_val(**opt);
torch.cuda.empty_cache()

In [None]:
import torch
from detect import run as run_detect

opt = {
    'data': 'data/coco128.yaml',
    'source': '../datasets/coco128/images/train2017/',
    'weights': 'runs/train/tune_pruned_model/weights/best.pt',
    'half': True,
    'imgsz': (640, 640),
    'name': 'optimized_model'
}
run_detect(**opt)
torch.cuda.empty_cache()

opt['name'] = 'original_model'
opt['weights'] = 'yolov5s.pt'
run_detect(**opt)
torch.cuda.empty_cache()

In [None]:
# Uncomment this if you want to show results

%matplotlib inline
import cv2
import matplotlib.pyplot as plt

original_predict = cv2.imread('runs/detect/original_model/000000000009.jpg')
optimized_predict = cv2.imread('runs/detect/optimized_model/000000000009.jpg')

figsize = 10
plt.figure(figsize=(figsize, figsize))
plt.imshow(cv2.hconcat([original_predict, optimized_predict])[:,:,::-1])
plt.grid(visible=False)
plt.show()

# OpenVino quantization

In [None]:
import torch
from export import run as run_export

opt = {
    'data': 'data/coco128.yaml',
    'weights': 'runs/train/tune_pruned_model/weights/best.pt',
    'batch_size': 1,
    'imgsz': [640],
    'include': ['onnx'],
}

run_export(**opt)

In [None]:
%env CUDA_VISIBLE_DEVICES=0
from quant import run as run_quant
opt = {
    'data': 'data/coco128.yaml',
    'weights': 'runs/train/tune_pruned_model/weights/best.onnx',
    'batch_size': 1,
    'imgsz': 640,
    'device': 'cuda',
    'backend': 'openvino',
    'n_epochs': 2,
}

run_quant(**opt)

# Run quantized model

In [None]:
import numpy as np
from enot_lite.backend import BackendFactory
from enot_lite.type import BackendType

inputs = np.ones((1,3,640,640), dtype=np.float32)
backend = BackendFactory().create(
    'runs/train/tune_pruned_model/weights/best_quant.onnx',
    BackendType.ORT_OPENVINO,
    input_example=inputs,
)

prediction = backend(inputs)

In [None]:
prediction

# Check acceleration

### Baseline

In [None]:
import numpy as np
from enot_lite.benchmark import Benchmark
from enot_lite.type import BackendType

benchmark = Benchmark(
    batch_size=1,
    onnx_model='runs/prune/prune_yolov5s_coco/weights/original_model.onnx',
    onnx_input=(np.ones((1, 3, 640, 640), dtype=np.float32),),
    backends=[BackendType.ORT_OPENVINO],
    number=10,
    warmup=10,
    repeat=10
)

benchmark.run()
benchmark.print_results()

### Pruned

In [None]:
import numpy as np
from enot_lite.benchmark import Benchmark
from enot_lite.type import BackendType

benchmark = Benchmark(
    batch_size=1,
    onnx_model='runs/train/tune_pruned_model/weights/best.onnx',
    onnx_input=(np.ones((1, 3, 640, 640), dtype=np.float32),),
    backends=[BackendType.ORT_OPENVINO],
    number=10,
    warmup=10,
    repeat=10
)

benchmark.run()
benchmark.print_results()

### Quantized

In [None]:
import numpy as np
from enot_lite.benchmark import Benchmark
from enot_lite.type import BackendType

benchmark = Benchmark(
    batch_size=1,
    onnx_model='runs/train/tune_pruned_model/weights/best_quant.onnx',
    onnx_input=(np.ones((1, 3, 640, 640), dtype=np.float32),),
    backends=[BackendType.ORT_OPENVINO],
    number=10,
    warmup=10,
    repeat=10,
)

benchmark.run()
benchmark.print_results()