In [None]:
# ===============================================================================================================#
# Copyright 2023 Infosys Ltd.                                                                          #
# Use of this source code is governed by Apache License Version 2.0 that can be found in the LICENSE file or at  #
# http://www.apache.org/licenses/                                                                                #
# ===============================================================================================================#

## RAG Metrics

#### Visualising the metrics
<div style="line-height: 1;">
<span style="color:Green"><b>NOTE: </b><br><b>1. </b>This notebook is used to visualise the metrics from the result file generated by running <b>rag_evaluation</b> notebook<br><b>2. </b>The path to the result file needs to be provided.For more than one result file , specify the path to all the result files.<br><b>3. </b>In this use case we have used <i>two</i> result files for visualisation.<br>
</span>
</div>

#### Import libraries

In [None]:
import os
import shutil
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Define storage path

In [None]:
STORAGE_ROOT_PATH = 'C:/del/fs/notebookuc/STORAGE'

#### Copying files
<div style="line-height: 1;">
<span style="color:Red"><b>NOTE: </b>In this notebook below is used to copy sample files to folders in <i>STORAGE_ROOT_PATH</i>.<br>
In production the data and config files should be kept under respective folders in <i>STORAGE_ROOT_PATH </i>.<br>
</span>
</div>

In [None]:
current_data_path = os.path.abspath('./data')

if not os.path.exists(f'{STORAGE_ROOT_PATH}/data'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data')
if not os.path.exists(f'{STORAGE_ROOT_PATH}/data/input'):
    os.makedirs(f'{STORAGE_ROOT_PATH}/data/input')     
shutil.copy(f'{current_data_path}/evaluation_result_openai.json',
            f'{STORAGE_ROOT_PATH}/evaluation_result_openai.json')
shutil.copy(f'{current_data_path}/evaluation_result_mixtral8x7b-instruct.json',
            f'{STORAGE_ROOT_PATH}/evaluation_result_mixtral8x7b-instruct.json')

In [None]:
Y_LABEL = 'Scores'

In [None]:
MODEL_1 = 'gpt-4 (OpenAI)'

#### Visualise metrics for 'text-davinci-003 (OpenAI)'

In [None]:
RESULT_FILE_PATH = 'C:/del/fs/notebookuc/STORAGE/evaluation_result_openai.json'

In [None]:
eval_result_model_1 = f"{RESULT_FILE_PATH}"

In [None]:
with open(eval_result_model_1, 'r',encoding='utf-8') as f:
    result = json.load(f)

In [None]:
df_records_1 = pd.DataFrame(result.get('records'))

In [None]:
print(f"Metrics for {MODEL_1}")
df_records_1

<div style="line-height: 1;">
    <span style="color:Green"><b>NOTE: </b><br>The cell below is used to define the list of columns to be excluded from the dataframe.<br>Edit the list to exclude all columns other than the columns representing the metrics.
</span>
</div>

In [None]:
exclude_columns_1 = ['question','contexts','ground_truth','answer','additional_field_file_name','additional_field_page_no']

In [None]:
df_records_1_ex = df_records_1.drop(columns=exclude_columns_1)

In [None]:
df_records_1_ex

In [None]:
df_records_1_ex_mean = df_records_1_ex.mean()

In [None]:
df_records_1_ex_mean

In [None]:
# Data to plot
# df_records_1_ex_mean

# Number of Columns
N = len(df_records_1_ex.columns)

# The x locations for the groups
ind = np.arange(N)

# The width of the bars
width = 0.3

# Create a bar chart with adjusted x values
plt.figure(figsize=(12, 4))
plt.bar(ind, df_records_1_ex_mean, width, color='lime')

# Add labels, title, and legend
plt.ylabel(f'{Y_LABEL}')
plt.title(f'RAG Assessment :: Metrics for {MODEL_1}')
plt.xticks(ind + width / 4, df_records_1_ex.columns,rotation=45)


# Display the plot
plt.show()

<div  style="line-height: 1;">
    <span style="color:Green"><b>NOTE: <br></b>This completes the visualisation for model 1.<br>If you need to visualise metrics for more models ,proceed to next step. </b><br>

In [None]:
MODEL_2 = 'Mixtral8x7b-instruct'

#### Visualise metrics for 'Mixtral8x7b-instruct'

In [None]:
RESULT_FILE_PATH = 'C:/del/fs/notebookuc/STORAGE/evaluation_result_mixtral8x7b-instruct.json'

In [None]:
eval_result_model_2 = f"{RESULT_FILE_PATH}"

In [None]:
with open(eval_result_model_2, 'r',encoding='utf-8') as f:
    result = json.load(f)

In [None]:
df_records_2 = pd.DataFrame(result.get('records'))

In [None]:
print(f"Metrics for {MODEL_2}")
df_records_2

<div style="line-height: 1;">
    <span ><b>NOTE: </b><br>The cell below is used to define the list of columns to be excluded from the data-frame.<br>Edit the list to exclude all columns other than the columns representing the metrics.
</span>
</div>

In [None]:
exclude_columns_2 = ['question','contexts','ground_truth','answer','additional_field_file_name','additional_field_page_no']

In [None]:
df_records_2_ex = df_records_2.drop(columns=exclude_columns_2)

In [None]:
df_records_2_ex

In [None]:
df_records_2_ex_mean = df_records_2_ex.mean()

In [None]:
df_records_2_ex_mean

In [None]:
# Data to plot
# df_records_2_ex_mean

# Number of Columns
N = len(df_records_2_ex.columns)

# The x locations for the groups
ind = np.arange(N)

# The width of the bars
width = 0.3

# Create a bar chart with adjusted x values
plt.figure(figsize=(12, 4))
plt.bar(ind, df_records_2_ex_mean, width, color='#3261e3')

# Add labels, title, and legend
plt.ylabel(f'{Y_LABEL}')
plt.title(f'RAG Assessment :: Metrics for {MODEL_2}')
plt.xticks(ind + width / 4, df_records_2_ex.columns,rotation=45)

# Display the plot
plt.show()

<div  style="line-height: 1;">
    <span style="color:Green"><b>NOTE: <br></b>This completes the visualisation for model 2.<br>If you need to compare performance of multiple models evaluated on the same dataset, proceed to the next step. </b><br>

#### Compare performance of multiple models

In [None]:
df= pd.DataFrame()
df= pd.concat([df, df_records_1_ex.mean()])

In [None]:
master_df = df.T

In [None]:
df= pd.DataFrame()
df= pd.concat([df, df_records_2_ex.mean()])

In [None]:
master_df= pd.concat([master_df, df.T])

In [None]:
master_df

In [None]:
# Data to plot
# master_df

# Number of Columns
N = len(master_df.columns)

# The x locations for the groups
ind = np.arange(N)

# The width of the bars
width = 0.3

# Create a bar chart with adjusted x values
plt.figure(figsize=(12, 4))
plt.bar(ind, master_df.iloc[0], width, color='lime')
plt.bar(ind + width, master_df.iloc[1], width, color='#3261e3')

# Add labels, title, and legend
plt.ylabel(f'{Y_LABEL}')
plt.title('RAG Assessment Comparison')
plt.xticks(ind + width / 4, master_df.columns,rotation=45)
plt.legend([f'{MODEL_1}', f'{MODEL_2}'])
#plt.legend([f'{MODEL_1}', f'{MODEL_2}'],loc="upper center")

# Display the plot
plt.show()