# SwissPollenTools | Inference Pipeline Example

1. Loading the tools
2. Creating the pipeline configuration
3. Creating the pipeline
4. Running the inference pipeline

## 1. Loading the tools

In [None]:
from swisspollentools.pipelines import InferencePipelineConfig, InferencePipeline

### Loading the Keras Model
> Note that implementing a new model, it is preferable to implement the respective postprocessing function and to provide it with the model.

For demonstration purposes, we implement a random model and we design a postprocessing function associated with it. 
In an operational setup, the model would be loaded using the `keras.models.load_model` command and the post processing function would be implemented (copied) from the inference script.

In [None]:
import numpy as np

class RandomModel():
    def __init__(self):
        pass

    def predict(self, batch, *args, **kwargs):
        return np.random.random((len(batch["rec0"]), 8))
    
def post_processing_fn(batch):
    predicted_class = np.argmax(batch, axis=-1)
    predicted_certainity = np.max(batch, axis=-1)
    return {
        "class": predicted_class,
        "certainity": predicted_certainity
    }

## 2. Creating the pipeline configuration

### Extraction Tool Parameters
- `exw_batch_size`
- `exw_keep_metadata`
- `exw_keep_fluorescence`
- `exw_keep_rec_properties`
- `exw_keep_metadata_key`
- `exw_keep_fluorescence_keys`
- `exw_keep_rec_properties_keys`
- `exw_filters`
### Inference Tool Parameters
- `inw_from_rec0`
- `inw_from_rec1`
- `inw_from_fluorescence`
- `inw_from_fluorescence_keys`
- `inw_rec_shape`
- `inw_rec_precision`
- `inw_batch_size`
- `inw_post_processing_fn`
### ToCSV Tool Parameters
- `tocsvw_output_directory`

In [None]:
config = InferencePipelineConfig(
    exw_batch_size=1024,
    exw_keep_metadata_key=["eventId"],
    exw_keep_fluorescence_keys=["average_std", "average_mean", "relative_spectra"],
    exw_filters={"max_area": 625, "max_solidity": 0.9},
    inw_from_fluorescence=False,
    inw_batch_size=256,
    inw_post_processing_fn=post_processing_fn,
    tocsvw_output_directory="./tmp"
)

## 3. Creating the pipeline

In [None]:
pipeline = InferencePipeline(config, inw_model=RandomModel())

## 4. Running the inference pipeline

In [None]:
import tensorflow as tf 

with tf.device("/cpu:0"):
    out = pipeline(file_path="./path/to/example.zip")

## Appendice A: Implementing the Inference Pipeline with a Merge Step

In [1]:
import tensorflow as tf

from swisspollentools.utils import *
from swisspollentools.workers import ExtractionWorkerConfig, \
    InferenceWorkerConfig, MergeWorkerConfig, ToCSVWorkerConfig, \
    ExtractionRequest, ZipExtraction, InferenceRequest, Inference, \
    MergeRequest, Merge, ToCSVRequest, ToCSV

### Creating the pipeline configurations

In [None]:
exw_config = ExtractionWorkerConfig(
    exw_batch_size=1024,
    exw_keep_metadata_key=["eventId"],
    exw_keep_fluorescence_keys=["average_std", "average_mean", "relative_spectra"],
    exw_filters={"max_area": 625, "max_solidity": 0.9},
)
inw_config = InferenceWorkerConfig(
    inw_from_fluorescence=False,
    inw_batch_size=256,
    inw_post_processing_fn=post_processing_fn,
)
mew_config = MergeWorkerConfig()
tocsvw_config = ToCSVWorkerConfig(
    tocsvw_output_directory="./tmp"
)

### Implementing the pipeline

In [None]:
def MergedInferencePipeline(config, **kwargs):
    exw_config, inw_config, mew_config, tocsvw_config = config
    exw_kwargs = get_subdictionary(kwargs, EXTRACTION_WORKER_PREFIX, ATTRIBUTE_SEP)
    inw_kwargs = get_subdictionary(kwargs, INFERENCE_WORKER_PREFIX, ATTRIBUTE_SEP)
    mew_kwargs = get_subdictionary(kwargs, MERGE_WORKER_PREFIX, ATTRIBUTE_SEP)
    tocsvw_kwargs = get_subdictionary(kwargs, TOHDF5_WORKER_PREFIX, ATTRIBUTE_SEP)

    def run(file_path):
        out = ExtractionRequest(file_path=file_path)
        out = ZipExtraction(out, exw_config, **exw_kwargs)
        out = (InferenceRequest(file_path, batch_id, response=el) for batch_id, el in enumerate(out))
        out = (Inference(el, inw_config, **inw_kwargs).__next__() for el in out)
        out = (MergeRequest(file_path, None, el) for el in out)
        out = Merge(list(out), mew_config, **mew_kwargs)
        out = [ToCSVRequest(file_path, None, response=out)]
        out = (ToCSV(el, tocsvw_config, **tocsvw_kwargs).__next__() for el in out)

        return list(out)
    
    return run
        

In [None]:
pipeline = MergedInferencePipeline(
    (exw_config, inw_config, mew_config, tocsvw_config),
    inw_model=RandomModel()
)

In [None]:
with tf.device("/cpu:0"):
    out = pipeline(file_path="./path/to/example.zip")