In [None]:
# ===============================================================================================================#
# Copyright 2024 Infosys Ltd.                                                                          #
# Use of this source code is governed by Apache License Version 2.0 that can be found in the LICENSE file or at  #
# http://www.apache.org/licenses/                                                                                #
# ===============================================================================================================#

## UC_03 - RAG Evaluation Batch

<div  style="line-height: 1;">
    <span style="color:Green"><b>NOTE:</b><br>
        <b>1. </b>The RAG evaluation pipeline is to be ran after running <i>uc_01_rag_indexing_vectordb</i> pipeline.<br>
        <b>2. </b>Make sure to have <b>infy_model_service</b>, <b>infy_db_service</b> and <b>infy_search_service</b> running, before running this pipeline.<br>
        <b>3. </b>This pipeline uses a sample <i>question_data.xlsx</i> as input, to run this pipeline on custom <i>question_data.xlsx</i>, run <i>uc_02_qna_generation_batch</i> to generate the <i>question_data.xlsx</i> and use that as input in this pipeline.<br><br>
        <b>IMPORTANT: </b>To run this pipeline, the <i>index_id</i> generated after running the <i>uc_01_rag_indexing_vectordb</i> needs to be updated in the config file. Kindly refer to the <b><i style="color:Red">WARNING</i></b> in <i>Run the pipeline</i> section for more information.<br>
    <span>
</div>

#### Import libraries

In [None]:
import os
import json
import shutil
import infy_fs_utils
import infy_dpp_sdk
from infy_model_evaluation.common.constants import Constants
from _internal_utils.pipeline_helper import PipelineHelper
import warnings
warnings.simplefilter("ignore")

#### Set environment variables
<div  style="line-height: 1;">
    <span style="color:Red"><b>NOTE:</b> The Pipeline uses environment variables which needs to be set by the developer.<br>
In production developer needs to set them as required.<br>
In this notebook you can provide them using the below code.<br>
To set or change the value please refer <i>installation.ipynb</i></span>
</div>

In [None]:
%store -r USE_LOCAL_STORE
if 'USE_LOCAL_STORE' in locals() and USE_LOCAL_STORE:
    %store -r AZURE_OPENAI_SERVER_BASE_URL
    os.environ['AZURE_OPENAI_SERVER_BASE_URL']=AZURE_OPENAI_SERVER_BASE_URL
    %store -r AZURE_OPENAI_SECRET_KEY
    os.environ['AZURE_OPENAI_SECRET_KEY']=AZURE_OPENAI_SECRET_KEY
    %store -r LITELLM_PROXY_SERVER_BASE_URL
    os.environ['LITELLM_PROXY_SERVER_BASE_URL']=LITELLM_PROXY_SERVER_BASE_URL
    %store -r LITELLM_PROXY_SECRET_KEY
    os.environ['LITELLM_PROXY_SECRET_KEY']=LITELLM_PROXY_SECRET_KEY
    %store -r INFY_DB_SERVICE_BASE_URL
    os.environ['INFY_DB_SERVICE_BASE_URL']=INFY_DB_SERVICE_BASE_URL
    %store -r INFY_MODEL_SERVICE_BASE_URL
    os.environ['INFY_MODEL_SERVICE_BASE_URL']=INFY_MODEL_SERVICE_BASE_URL
    %store -r INFY_RESOURCE_SERVICE_BASE_URL
    os.environ['INFY_RESOURCE_SERVICE_BASE_URL']=INFY_RESOURCE_SERVICE_BASE_URL
    %store -r INFY_SEARCH_SERVICE_BASE_URL
    os.environ['INFY_SEARCH_SERVICE_BASE_URL']=INFY_SEARCH_SERVICE_BASE_URL
else:
    print("USE_LOCAL_STORE is not set to true. DPP pipeline will use system environment variables.")

#### Define configuration file path

In [None]:
STORAGE_ROOT_PATH = 'C:/del/fs/notebookuc/STORAGE'
CONTAINER_ROOT_PATH = 'C:/del/fs/notebookuc/CONTAINER'
PIPELINE_INPUT_CONFIG_FILE_PATH = '/data/config/dpp_pipeline_rag_evaluation_input_config.json'

#### Copying files
<div style="line-height: 1;">
<span style="color:Red"><b>NOTE: </b>In this notebook below is used to copy sample files to folders in <i>STORAGE_ROOT_PATH</i>.<br>
In production the data and config files should kept under respective folders in <i>STORAGE_ROOT_PATH </i>.<br>
</span>
</div>

In [None]:
input_data_path = os.path.abspath('../../../../../../')
current_data_path = os.path.abspath('../data')

if not os.path.exists(f'{STORAGE_ROOT_PATH}/data'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data')
if not os.path.exists(f'{STORAGE_ROOT_PATH}/data/input'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data/input')   
shutil.copy(f'{input_data_path}/_internal/samples/input/question_data.xlsx',
            f'{STORAGE_ROOT_PATH}/data/input/question_data.xlsx')
shutil.copytree(f'{current_data_path}/sample/config',f'{STORAGE_ROOT_PATH}/data/config',
                dirs_exist_ok=True)

#### Initialize Client Config:

In [None]:
storage_config_data = infy_fs_utils.data.StorageConfigData(
        **{
            "storage_root_uri": f"file://{STORAGE_ROOT_PATH}",
            "storage_server_url": "",
            "storage_access_key": "",
            "storage_secret_key": ""
        })
file_sys_handler = infy_fs_utils.provider.FileSystemHandler(
    storage_config_data)
if not infy_fs_utils.manager.FileSystemManager().has_fs_handler(
    infy_dpp_sdk.common.Constants.FSH_DPP):
    infy_fs_utils.manager.FileSystemManager().set_root_handler_name(
                    infy_dpp_sdk.common.Constants.FSH_DPP)
    infy_fs_utils.manager.FileSystemManager().add_fs_handler(file_sys_handler)   

In [None]:
# Configure client properties
client_config_data = infy_dpp_sdk.ClientConfigData(
    **{
        "container_data": {
            "container_root_path": f"{CONTAINER_ROOT_PATH}",
        }
    })
infy_dpp_sdk.ClientConfigManager().load(client_config_data)

#### Initialize Logging

In [None]:
import logging
# Modify as required to control the overall logging level
logging.basicConfig(level=logging.ERROR)
logging_config_data = infy_fs_utils.data.LoggingConfigData(
        **{
            # "logger_group_name": "my_group_1",
            "logging_level": 40,
            "logging_format": "",
            "logging_timestamp_format": "",
            "log_file_data": {
                "log_file_dir_path": "/logs",
                "log_file_name_prefix": "rag_evaluation_batch",
                # "log_file_name_suffix": "1",
                "log_file_extension": ".log"

            }})

In [None]:
if not infy_fs_utils.manager.FileSystemLoggingManager().has_fs_logging_handler(
    infy_dpp_sdk.common.Constants.FSLH_DPP):
    file_sys_logging_handler = infy_fs_utils.provider.FileSystemLoggingHandler(
                logging_config_data, file_sys_handler)
    infy_fs_utils.manager.FileSystemLoggingManager(
            ).set_root_handler_name(infy_dpp_sdk.common.Constants.FSLH_DPP)
    infy_fs_utils.manager.FileSystemLoggingManager(
            ).add_fs_logging_handler(file_sys_logging_handler)

### Run the pipeline

<div  style="line-height: 1;">
    <span style="color:Red"><b>WARNING:</b><br>
        Before running this pipeline make sure to fill the <i>index_id</i> field in the <i>dpp_pipeline_rag_evaluation_input_config.json</i> with the appropriate value obtained after running <i>uc_01_rag_indexing_vectordb</i><br>Follow the steps below for details: <br>
        <b>1: </b>Open the config file found in the following location <i>/data/config/dpp_pipeline_rag_evaluation_input_config.json</i><br>
        <b>2: </b>Inside the config navigate to <i>processor_input_config</i>-><i>SemanticSearch</i>-><i>services</i>-><i>request_payload</i>-><i>retrieval</i>-><i>index_id</i> and update the <i>index_id</i> value obtained after running <i>uc_01_rag_indexing_vectordb</i><br>
        <b></b>
    <span>
</div>

In [None]:
dpp_orchestrator = infy_dpp_sdk.orchestrator.OrchestratorNative(
        input_config_file_path=PIPELINE_INPUT_CONFIG_FILE_PATH)

In [None]:
processor_response_list = dpp_orchestrator.run_batch()

In [None]:
print(json.dumps(processor_response_list[0].context_data.get('request_closer'),indent=4))

<div  style="line-height: 1;">
    <span style="color:Green"><b>NOTE: </b> The evaluation results will be available in <i><b>rag_report.xlsx</b></i> file generated at the path below:</span></div>

In [None]:
print(json.dumps(processor_response_list[0].context_data.get('rag_report_generator').get('rag_report_file_path'),indent=4))