In [None]:
# ===============================================================================================================#
# Copyright 2023 Infosys Ltd.                                                                          #
# Use of this source code is governed by Apache License Version 2.0 that can be found in the LICENSE file or at  #
# http://www.apache.org/licenses/                                                                                #
# ===============================================================================================================#

## RAG Evaluation

<div  style="line-height: 1;">
    <span style="color:Green"> <b>PRE-REQUISITES : </b><br><br> 
        1.The library requires an <b>input config file</b> and <b>dataset file(s)</b> in order to perform the evaluation.<br>2.The input config file captures the configurations required for embeddings,llm,metrics used for evaluation and the path to the directory containing one or more dataset files.<br>3.The <b>ground_truth</b> needs to be provided along with <b>question</b> and a <b>context</b> in the dataset file.For evaluating a model , fetching answer is a capability provided by the library.<br>
</span>
</div>

#### Import libraries

In [None]:
import os
import shutil
import json
from typing import List
import pytest
from langchain_openai import AzureOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from langchain_openai.chat_models import AzureChatOpenAI

import infy_fs_utils
import infy_model_evaluation
from infy_model_evaluation.common.constants import Constants
from infy_model_evaluation.common.logger_factory import LoggerFactory
from infy_model_evaluation.configuration import ClientConfigData
from infy_model_evaluation.evaluator.process.rag_evaluator import RagEvaluator
from infy_model_evaluation.data.config_data import EvaluatorMetrics, Result, TargetLlm, Datasource
from infy_model_evaluation.data.config_data import EvaluatorConfigData
from infy_model_evaluation.data.dataset import EvaluatorDataset
from infy_model_evaluation.data.dataset import DatasetEntry

#### Define configuration file path

In [None]:
STORAGE_ROOT_PATH = 'C:/del/fs/notebookuc/STORAGE'
CONTAINER_ROOT_PATH = 'C:/del/fs/notebookuc/CONTAINER'
INPUT_CONFIG_FILE_PATH = '/data/config/input_config.json'

#### Copying files
<div style="line-height: 1;">
<span style="color:Red"><b>NOTE: </b>In this notebook below is used to copy sample files to folders in <i>STORAGE_ROOT_PATH</i>.<br>
In production the data and config files should be kept under respective folders in <i>STORAGE_ROOT_PATH </i>.<br>
</span>
</div>

In [None]:
current_data_path = os.path.abspath('./data')

if not os.path.exists(f'{STORAGE_ROOT_PATH}/data'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data')
if not os.path.exists(f'{STORAGE_ROOT_PATH}/data/input'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data/input')     
shutil.copy(f'{current_data_path}/dataset_file.json',
            f'{STORAGE_ROOT_PATH}/data/input/dataset_file.json')
shutil.copy(f'{current_data_path}/input_config.json',
            f'{STORAGE_ROOT_PATH}/data/config/input_config.json')

#### Initialize Client Config

In [None]:
storage_config_data = infy_fs_utils.data.StorageConfigData(
        **{
            "storage_root_uri": f"file://{STORAGE_ROOT_PATH}",
            "storage_server_url": "",
            "storage_access_key": "",
            "storage_secret_key": ""
        })

file_sys_handler = infy_fs_utils.provider.FileSystemHandler(
    storage_config_data)
if not infy_fs_utils.manager.FileSystemManager().has_fs_handler(
    Constants.FSH_MODEL_EVALUATION):
    infy_fs_utils.manager.FileSystemManager().set_root_handler_name(
                    Constants.FSH_MODEL_EVALUATION)
    infy_fs_utils.manager.FileSystemManager().add_fs_handler(file_sys_handler)

In [None]:
# Configure client properties
client_config_data = ClientConfigData(
        **{
            "container_data": {
                "container_root_path": f"{CONTAINER_ROOT_PATH}",
            }
        }
    )
infy_model_evaluation.ClientConfigManager().load(client_config_data)

#### Initialize Logging

In [None]:
import logging
# Modify as required to control the overall logging level
logging.basicConfig(level=logging.ERROR)
logging_config_data = infy_fs_utils.data.LoggingConfigData(
        **{
            # "logger_group_name": "my_group_1",
            "logging_level": 10,
            "logging_format": "",
            "logging_timestamp_format": "",
            "log_file_data": {
                "log_dir_path": "/logs",
                "log_file_name_prefix": "infy_model_evaluation",
                "log_file_name_suffix": "",
                "log_file_extension": ".log"

            }})

In [None]:
if not infy_fs_utils.manager.FileSystemLoggingManager().has_fs_logging_handler(
    Constants.FSH_MODEL_EVALUATION):
    file_sys_logging_handler = infy_fs_utils.provider.FileSystemLoggingHandler(
                logging_config_data, file_sys_handler)
    infy_fs_utils.manager.FileSystemLoggingManager(
            ).set_root_handler_name(Constants.FSH_MODEL_EVALUATION)
    infy_fs_utils.manager.FileSystemLoggingManager(
            ).add_fs_logging_handler(file_sys_logging_handler)

### Build the Library Config

<div  style="line-height: 1;">
    <span style="color:Red"><b>WARNING:</b><br>
        Before running the below code make sure to fill the values for <i>api_url</i> and  <i>api_key</i> fields in the <i>input_config.json</i> with the appropriate values.<br>Follow the steps below for details: <br>
        <b>1: </b>Open the config file found in the following location <i>/data/config/input_config.json</i><br>
        <b>2: </b>Inside the config update values for the above two fields for embedding and llm based on what type you want to use.<br>
        <b></b>
    <span>
</div>

In [None]:
file_path = f'{INPUT_CONFIG_FILE_PATH}'
config_file_content = file_sys_handler.read_file(file_path)
request_config_data = json.loads(config_file_content)


In [None]:
evaluator_config_data = request_config_data.get('evaluator', {})
target_config_data = request_config_data.get('target', {})
datasource_config_data = request_config_data.get('datasource', {})
result_config_data = request_config_data.get('result', {})
datasource_config = datasource_config_data.get('configuration')
result_config = result_config_data.get('configuration')

In [None]:
# Prepare Evaluator Config
for key, value in evaluator_config_data.items():
        if key == 'embedding':
            for key, val in value.items():
                if key == "openai":
                    if val.get('enabled'):
                        embedding_config = val.get('configuration')
                        break
        if key == 'llm':
            for key, val in value.items():
                if key == "openai":
                    if val.get('enabled'):
                        llm_config = val.get('configuration')
                        break
        if key == 'metrics_list':
            metrics = []
            for metric in value:
                if metric.get('enabled'):
                    metrics.append(metric.get('name'))
        if key == 'evaluation_only':
            evaluation_only = value
        if key == 'context_filter':
            context_filter = value
    

In [None]:
for key, value in target_config_data.items():
        if key == 'llm':
            target_llm_config = value.get('configuration')
            break

In [None]:
# Prepare evaluator embedding config
evaluator_embedding = AzureOpenAIEmbeddings(
    **{
        "openai_api_type": embedding_config.get('api_type'),
        "azure_endpoint": os.environ['AZURE_OPENAI_SERVER_BASE_URL'],
        "api_key": os.environ['AZURE_OPENAI_SECRET_KEY'],
        "openai_api_version": embedding_config.get('api_version'),
        "model": embedding_config.get('model_name'),
        "azure_deployment": embedding_config.get('deployment_name'),
    }
)

In [None]:
# Prepare evaluator llm config
evaluator_llm_chat = None
evaluator_llm = None    
if (llm_config.get('is_chat_model')):
        evaluator_llm_chat = AzureChatOpenAI(
            **{
                "openai_api_type": llm_config.get('api_type'),
                "azure_endpoint": os.environ['AZURE_OPENAI_SERVER_BASE_URL'],
                "api_key": os.environ['AZURE_OPENAI_SECRET_KEY'],
                "openai_api_version": llm_config.get('api_version'),
                "model": llm_config.get('model_name'),
                "azure_deployment": llm_config.get('deployment_name'),
            }
        )
else:
    evaluator_llm = AzureOpenAI(
            **{
                "openai_api_type": llm_config.get('api_type'),
                "azure_endpoint": os.environ['AZURE_OPENAI_SERVER_BASE_URL'],
                "api_key": os.environ['AZURE_OPENAI_SECRET_KEY'],
                "openai_api_version": llm_config.get('api_version'),
                "model": llm_config.get('model_name'),
                "azure_deployment": llm_config.get('deployment_name'),
            }
    )


In [None]:
# Prepare evaluator metics config
evaluator_metrics = EvaluatorMetrics(
    **{
        "metrics": metrics
    }
)

In [None]:
# Prepare target llm config
__target_llm = TargetLlm(**target_llm_config)
__target_llm.api_key = os.environ['AZURE_OPENAI_SERVER_BASE_URL']
__target_llm.api_url = os.environ['AZURE_OPENAI_SECRET_KEY']

In [None]:
__result = Result(**result_config)

In [None]:
__datasource = Datasource(**datasource_config)

In [None]:
# Prepare evaluator config data
evaluator_config_data = EvaluatorConfigData(
    embedding=evaluator_embedding,
    llm=evaluator_llm,
    llm_chat=evaluator_llm_chat,
    metrics=evaluator_metrics.metrics,
    target_llm=__target_llm,
    evaluation_only=evaluation_only,
    context_filter=context_filter,    
    result=__result,
    datasource=__datasource,
    is_evaluator_llm_chat_model=llm_config.get('is_chat_model'),
    evaluator_embedding_tiktoken_cache_dir=embedding_config.get(
                'tiktoken_cache_dir')
)

<div style="line-height: 1;">
    <span ><b>NOTE: </b><br>Run the cell below <b>only</b> in case you want to evaluate a custom model<br><br>Configure the <i>api_url</i> and other parameters accordingly
</span>
</div>

In [None]:
# evaluator_config_data.target_llm.api_type= ""
# evaluator_config_data.target_llm.api_url = os.environ['CUSTOM_LLM_URL']
# evaluator_config_data.target_llm.max_tokens = 1024
# evaluator_config_data.target_llm.temperature = 0.7
# evaluator_config_data.target_llm.tiktoken_cache_dir = ""
# evaluator_config_data.target_llm.remove_prompt_from_response = False
# evaluator_config_data.target_llm.requires_num_return_sequences = False
# evaluator_config_data.target_llm.num_return_sequences = 1
# evaluator_config_data.target_llm.do_sample = True
# evaluator_config_data.result.file_path = "/evaluation_result_mixtral8x7b-instruct.json"


### Run the evaluation

In [None]:
evaluator = RagEvaluator()
result = evaluator.evaluate(evaluator_config_data,[])

### Verify results

<div  style="line-height: 1;">
    <span style="color:Green"><b>NOTE: </b> The results of the evaluation will be available in <i>evaluation_result.json</i> file at <i>STORAGE_ROOT_PATH</i>.</span></div>

In [None]:
print(result.get('aggregation'))

### Further Processing

<div  style="line-height: 1;">
    <span style="color:Green"><b>NOTE: <br></b>This completes the model evaluation for the dataset file(s).<br>As a continuation step to visualise the metrics, refer <i>rag_metrics</i> under <b>reporter</b> which will require the <i>evaluation_result.json</i> file created above.