# Datasets API usage example

### 1. Read and compile model

In [1]:
from openvino.runtime import Core
import numpy as np

core = Core()
model = core.read_model('SampLeNet.xml')
compiled_model = core.compile_model(model, 'CPU')

### 2. Create datset, preprocessings, postprocessing and metric

In [2]:
from openvino.model_zoo.datasets import CIFAR10Dataset
from openvino.model_zoo.transforms.input import pillow_resize, normalization, transpose
from openvino.model_zoo.metrics import create_accuracy
from openvino.model_zoo.transforms.output import ClassificationPostprocessor

# just for demonstration that any other custom transforms accepted
def expand_dim(image, **kwargs):
    image.data = np.expand_dims(image.data, 0)
    return image


transforms = [
    pillow_resize(size=32), 
    normalization(mean=[125.307, 122.961, 113.8575], std=[51.5865, 50.847, 51.255]),
    transpose([2, 0, 1]),
    expand_dim
]

dataset_iterator = CIFAR10Dataset('cifar-10-batches-py', transforms=transforms, reader='pillow_imread')
postprocessor = ClassificationPostprocessor(compiled_model.outputs[0])

accuracy = create_accuracy()

Annotation conversion for cifar10 dataset has been started
Parameters to be used for conversion:
converter: cifar
data_batch_file: cifar-10-batches-py\test_batch
convert_images: True
converted_images_dir: cifar-10-batches-py\test
num_classes: 10
Annotation conversion for cifar10 dataset has been finished


### 3. Infer model on dataset in sync mode

In [3]:
infer_request = compiled_model.create_infer_request()
for (batch_input_ids, batch_annotation, batch_input, batch_meta, _) in dataset_iterator:
    infer_result = infer_request.infer(batch_input)
    batch_predictions = postprocessor(infer_result, batch_meta, batch_input_ids)
    accuracy.batch_update(batch_annotation, batch_predictions)

print(f'Model accuracy: {accuracy.evaluate()}')
    

Model accuracy: 0.7502


### 4. Dataset API and AsyncInferQueue

In [4]:
from openvino.runtime import AsyncInferQueue

accuracy.reset()
infer_queue = AsyncInferQueue(compiled_model, 2)
def completion_callback(request, user_data):
    batch_id, batch_identifiers, batch_annotations, batch_meta = user_data
    infer_results = request.results
    batch_predictions = postprocessor(infer_results, batch_meta, batch_identifiers)
    accuracy.batch_update(batch_annotations, batch_predictions)

infer_queue.set_callback(completion_callback)
for (batch_input_ids, batch_annotation, batch_input, batch_meta, batch_identifiers) in dataset_iterator:
    infer_queue.start_async(batch_input, (batch_input_ids, batch_identifiers, batch_annotation, batch_meta))
infer_queue.wait_all()

print(f'Model accuracy: {accuracy.evaluate()}')


Model accuracy: 0.7502


### Input transforms from frameworks


In [5]:
from torchvision import transforms
from openvino.model_zoo.transforms.input import from_torch

input_transforms = transforms.Compose([
    transforms.Resize((32, 32)), 
    transforms.ToTensor(), 
    transforms.Normalize(
        mean=[125.307 / 255, 122.961 / 255, 113.8575 / 255], 
        std=[51.5865 / 255, 50.847 / 255, 51.255 / 255])
    ])

transforms = [
    from_torch(input_transforms)
]
dataset_iterator.reset()
dataset_iterator.set_transforms(transforms)

for (batch_input_ids, batch_annotation, batch_input, batch_meta, batch_identifiers) in dataset_iterator:
    infer_result = infer_request.infer([np.array(batch_input)])
    batch_predictions = postprocessor(infer_result, batch_meta, batch_identifiers)
    accuracy.batch_update(batch_annotation, batch_predictions)

print(f'Model accuracy: {accuracy.evaluate()}')

Model accuracy: 0.7502


### Model quantization with POT API

In [10]:
import addict
from openvino.tools.pot import IEEngine, save_model, compress_model_weights, create_pipeline, load_model
from openvino.tools.pot.engines.utils import process_raw_output
postprocessor.output_tensor = 'fc3'

algorithms = [
        {
            'name': 'DefaultQuantization',
            'params': {
                'target_device': 'ANY',
                'preset': 'performance',
                'stat_subset_size': 300
            }
        }
]

model_config = addict.Dict({
        'model_name': 'sample_model',
        'model': 'SampLeNet.xml',
        'weights': 'SampLeNet.bin'
})

engine_config = addict.Dict({
        'device': 'CPU',
        'stat_requests_number': 4,
        'eval_requests_number': 4
})

In [7]:
class Engine(IEEngine):
    def _process_infer_output(self, stats_layout, predictions,
                              batch_annotations, batch_meta, need_metrics_per_sample):
        # Collect statistics
        if stats_layout:
            self._collect_statistics(outputs=predictions,
                                     stats_layout=stats_layout,
                                     annotations=batch_annotations)

        # Postprocess network output
        outputs = process_raw_output(predictions)
        output = outputs[self._output_layers[0]] #outputs['fc3']
        outputs[self._output_layers[0]] = self.postprocess_output(output, batch_meta)

        # Update metrics
        if batch_annotations:
            logits = postprocessor(outputs, batch_meta)
            self._update_metrics(output=logits, annotations=batch_annotations,
                                 need_metrics_per_sample=need_metrics_per_sample)
    
    def _process_batch(self, batch):
        _batch = batch[0]
        batch_ids, batch_annotation, batch_input, batch_meta = _batch[0], _batch[1], _batch[2], _batch[3]

        return list(zip(batch_ids, batch_annotation)), batch_input, batch_meta

    def _update_metrics(self, output, annotations, need_metrics_per_sample=False):
        """ Updates metrics.
        :param output: network output
        :param annotations: a list of annotations for metrics collection [(img_id, annotation)]
        :param need_metrics_per_sample: whether to collect metrics for each batch
        """
        annotations_are_valid = all(a is not None for a in annotations)

        if self._metric and annotations_are_valid:
            for out, (sample_id, ann) in zip(output, annotations):
                self._metric.update(ann, out)
                if need_metrics_per_sample:
                    metrics = self._metric.value
                    for metric_name, metric_value in metrics.items():
                        self._per_sample_metrics.append({'sample_id': sample_id,
                                                        'metric_name': metric_name,
                                                        'result': metric_value})

In [11]:
engine = Engine(engine_config, dataset_iterator, accuracy)
pipeline = create_pipeline(algorithms, engine)

In [12]:
model_rep = load_model(model_config)
compressed_model = pipeline.run(model_rep)

In [13]:
compress_model_weights(compressed_model)
save_model(compressed_model, 'optimized')

[{'model': 'optimized\\SampLeNet.xml', 'weights': 'optimized\\SampLeNet.bin'}]

In [16]:
metric_results = pipeline.evaluate(compressed_model)
print(metric_results)

{None: array(0.7413)}
