In [None]:
# ===============================================================================================================#
# Copyright 2024 Infosys Ltd.                                                                          #
# Use of this source code is governed by Apache License Version 2.0 that can be found in the LICENSE file or at  #
# http://www.apache.org/licenses/                                                                                #
# ===============================================================================================================#

## Tool 01 - Q&A (Interactive)
To demonstrate the applicability of `uc_51_inferencing_online.ipynb` for building interactive UI.

#### Import libraries

In [None]:
import os
import json
import shutil
import infy_dpp_sdk
import infy_fs_utils
from _internal_utils.pipeline_helper import PipelineHelper
from IPython.display import display, HTML, Markdown
from _internal_utils.q_n_a_visualizer import QnAVisualizer

#### Set environment variables
<div  style="line-height: 1;">
    <span style="color:Red"><b>NOTE:</b> The Pipeline uses environment variables which needs to be set by the developer.<br>
In production developer needs to set them as required.<br>
In this notebook you can provide them using the below code.<br>
To set or change the value please refer <i>installation.ipynb</i></span>
</div>

In [None]:
%store -r CUSTOM_LLM_MIXTRAL_INFERENCE_URL
os.environ['CUSTOM_LLM_MIXTRAL_INFERENCE_URL']=CUSTOM_LLM_MIXTRAL_INFERENCE_URL

%store -r OPENAI_KEY
os.environ['OPENAI_KEY'] = OPENAI_KEY
%store -r OPENAI_SERVER_URL
os.environ['OPENAI_SERVER_URL']=OPENAI_SERVER_URL

#### Define configuration file path

In [None]:
STORAGE_ROOT_PATH = 'C:/DPP/infy_libraries_client/tool/STORAGE'
CONTAINER_ROOT_PATH = 'C:/DPP/infy_libraries_client/tool/CONTAINER'
PIPELINE_INPUT_CONFIG_FILE_PATH = '/data/config/dpp_pipeline_inference_online_input_config.json'

#### Copying files
<div  style="line-height: 1;"><span style="color:Red"><b>NOTE: </b>In this notebook below is used to copy sample files to folders in <i>STORAGE_ROOT_PATH</i>.<br>
In production the data and config files should kept under respective folders in <i>STORAGE_ROOT_PATH </i>.<br>
<span></div>

In [None]:
curr_data = os.path.abspath('../data')
if not os.path.exists(f'{STORAGE_ROOT_PATH}/data'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data')
if not os.path.exists(f'{STORAGE_ROOT_PATH}/data/input'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data/input')
shutil.copytree(f'{curr_data}/sample/config',f'{STORAGE_ROOT_PATH}/data/config',
                dirs_exist_ok=True)
if not os.path.exists(f'{STORAGE_ROOT_PATH}/data/vectordb'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data/vectordb')
curr_data = os.path.abspath('./data')
shutil.copytree(f'{curr_data}/sample/vectordb',f'{STORAGE_ROOT_PATH}/data/vectordb',
                dirs_exist_ok=True)    

#### Initialize Client Config

In [None]:
storage_config_data = infy_fs_utils.data.StorageConfigData(
        **{
            "storage_root_uri": f"file://{STORAGE_ROOT_PATH}",
            "storage_server_url": "",
            "storage_access_key": "",
            "storage_secret_key": ""
        })

client_config_data = infy_dpp_sdk.ClientConfigData(
    **{
        "container_data": {
            "container_root_path": f"{CONTAINER_ROOT_PATH}",
        }
    })
file_sys_handler = infy_fs_utils.provider.FileSystemHandler(
    storage_config_data)
if not infy_fs_utils.manager.FileSystemManager().has_fs_handler(
    infy_dpp_sdk.common.Constants.FSH_DPP):
    infy_fs_utils.manager.FileSystemManager().add_fs_handler(
        file_sys_handler,
        infy_dpp_sdk.common.Constants.FSH_DPP)
infy_dpp_sdk.ClientConfigManager().load(client_config_data)

#### Initialize Logging

In [None]:
logging_config_data = infy_fs_utils.data.LoggingConfigData(
        **{
            # "logger_group_name": "my_group_1",
            "logging_level": 10,
            "logging_format": "",
            "logging_timestamp_format": "",
            "log_file_data": {
                "log_file_dir_path": "/logs",
                "log_file_name_prefix": "inference",
                # "log_file_name_suffix": "1",
                "log_file_extension": ".log"

            }})

In [None]:
if not infy_fs_utils.manager.FileSystemLoggingManager().has_fs_logging_handler(
    infy_dpp_sdk.common.Constants.FSLH_DPP):
    infy_fs_utils.manager.FileSystemLoggingManager().add_fs_logging_handler(
            infy_fs_utils.provider.FileSystemLoggingHandler(
                logging_config_data, file_sys_handler),
            infy_dpp_sdk.common.Constants.FSLH_DPP)

### Inference Pipeline - Q&A visualization 

In [None]:
# ---- Create response data -----
metadata = infy_dpp_sdk.data.MetaData(
    standard_data=infy_dpp_sdk.data.StandardData(
        filepath=infy_dpp_sdk.data.ValueData()))
document_data = infy_dpp_sdk.data.DocumentData(metadata=metadata)
context_data = {
}
response_data = infy_dpp_sdk.data.ProcessorResponseData(
    document_data=document_data, context_data=context_data)
document_data_json=json.loads(response_data.json(indent=4))

In [None]:
qna_visualizer = QnAVisualizer()

def form_submit_button_clicked(_):
    query = qna_visualizer.get_input_text()
    filter_str =qna_visualizer.get_filter_text().strip()
    qna_visualizer.set_output_text('Fetching. Please wait...')
    
    input_config_data=json.loads(file_sys_handler.read_file(
                        PIPELINE_INPUT_CONFIG_FILE_PATH))
    queries_dict=input_config_data['processor_input_config'\
                                  ]['QueryRetriever']['queries'][0]
    queries_dict['question']=query
    
    if filter_str:
        filter_metadata={'doc_name':filter_str}
    else:
        filter_metadata={}
    queries_dict['filter_metadata']=filter_metadata
    file_sys_handler.write_file(PIPELINE_INPUT_CONFIG_FILE_PATH,json.dumps(\
                                input_config_data,indent=4))
    
    dpp_orchestrator = infy_dpp_sdk.orchestrator.OrchestratorNativeBasic(
        input_config_file_path=PIPELINE_INPUT_CONFIG_FILE_PATH)
    processor_response_list = dpp_orchestrator.run_batch(
                    [infy_dpp_sdk.data.DocumentData(**document_data_json.\
                                                    get('document_data'))],
                    [document_data_json.get('context_data')])
    with open("./data/processor_response_data_list.json", "w") as f:
        json.dump(processor_response_list[0].dict(), f, indent = 4)
    output_list=processor_response_list[0].context_data.get("reader").get("output")
    print(output_list)
    model_output=output_list[0].get("model_output")
    if isinstance(model_output, dict):
        source_metadata_list = output_list[0].get("source_metadata")
        source_metadata = source_metadata_list[0] if source_metadata_list else {}
        qna_visualizer.set_output_text(
            f' answer  = {model_output.get("answer")}\n'
            f' source  = {source_metadata.get("doc_name", "Not Found")}\n'
            f' page_no = {model_output.get("sources")[0].get("page_no")}\n'
            f' confidence_pct = {model_output.get("confidence_pct")}'
        )
    else:
        qna_visualizer.set_output_text(f'{model_output}')
    
def count_tokens(text):
    # Custom token count logic to be added here. Sample below.
    count = len(text)
    return count

help_html="""
**Sample Question(s):**   
What is the percentage of women employees?  
What's the operating margin?  
What are list of equipment involved in Virtual Reality (VR)?  
Which football player has scored 15 goals from table?  
In which year was Infosys Certified excellent in employee conditions?  
<hr/>

**Sample Filter(s) for question no.2: (use any one)**  
AR_2022-23_page-14-17.pdf  
AR_2021-22_page-20-21.pdf
"""

display(Markdown(help_html))
qna_visualizer.on_form_submit_callback(form_submit_button_clicked)
qna_visualizer.set_token_counter_fn(count_tokens)
qna_visualizer.show_ui()