In [None]:
# ===============================================================================================================#
# Copyright 2023 Infosys Ltd.                                                                                    #
# Use of this source code is governed by Apache License Version 2.0 that can be found in the LICENSE file or at  #
# http://www.apache.org/licenses/                                                                                #
# ===============================================================================================================#

# Use case 3 - How to use Create and use Custom Module in Pipeline

<hr/>

## Sample Task: Document preprocessing post adding New Processor

<hr/>

## 1. Customized Processor Creation

### 1a. Processor Class Signature

In [None]:
import infy_dpp_sdk

class ChunkDataValidatorV1(infy_dpp_sdk.interface.IProcessor):
    """Chunk data validator Processor Implementation class"""
    __PROCESSOR_CONTEXT_DATA_NAME = "ChunkDataValidator"

    def do_execute(self, document_data: infy_dpp_sdk.data.DocumentData,
                   context_data: dict, config_data: dict) -> infy_dpp_sdk.data.ProcessorResponseData:
        processor_response_data = infy_dpp_sdk.data.ProcessorResponseData()

        return processor_response_data

### 1b. Add logic to the processor and update the Response Data 

In [None]:
import infy_dpp_sdk


class ChunkDataValidatorV1(infy_dpp_sdk.interface.IProcessor):
    """Document uploader Processor Implementation class"""
    __PROCESSOR_CONTEXT_DATA_NAME = "ChunkDataValidator"

    def do_execute(self, document_data: infy_dpp_sdk.data.DocumentData,
                   context_data: dict, config_data: dict) -> infy_dpp_sdk.data.ProcessorResponseData:
        # get the configuration from config data
        config_data = config_data.get("ChunkDataValidator")

        processor_response_data = infy_dpp_sdk.data.ProcessorResponseData()

        # sample logic
        if config_data.get('valid_test'):
            valid = 'success'
        else:
            valid = 'failure'
        # Populate context data
        context_data[self.__PROCESSOR_CONTEXT_DATA_NAME] = {
            "validation_status": valid}

        # Populate response data
        processor_response_data.document_data = document_data
        processor_response_data.context_data = context_data

        return processor_response_data

## 2. Save to .py file 

##### Here i have created a file named `chunk_data_validator` and saved my class and supporting content to that file

## 3. Copy to custom folder 

## 4. Add to Pipeline Configuration

### 4a. Add to Processor List

In [None]:
{
    "enabled": true,
    "processor_name": "chunk_data_validator",
    "processor_namespace": "_custimization.chunk_data_validator",
    "processor_class_name": "ChunkDataValidatorV1",
    "processor_input_config_name_list": [
        "ChunkDataValidator"
    ]
}

### 4b. Add Processor Configurations

In [None]:
"ChunkDataValidator": {
    "valid_test": true
}

#### : Add the configuration to the dpp_pipeline2.1_input_config.json
Another example of custom processor my_processor.py is created in custom folder 