# Evaluate RAG Model
The following notebook evaluates the RAG Model in a Q&A on EXIN Privacy & Data Protection Foundation Test and compares its accuracy with that of a general purpose LLM

In [None]:
from chat_with_assistant import GDPR_AI_Assistant
from rag_qa import *
from access_token import *
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import ChatPromptTemplate
import time 
import random 
from tqdm import tqdm 
import pandas as pd
import numpy as np

  from tqdm.autonotebook import tqdm, trange


In [27]:
rag_assistant = GDPR_AI_Assistant()
rag_assistant.create_session()
system_prompt = """
You are a GDPR Expert taking the Privacy and Data Protection Exam. 
Choose the most adequate answer between A), B), C) or D).
Leverage your GDPR knowledge and context when answering.
"""

temperature=0.1
max_length=100

rag_assistant.set_system_parameters(prompt=system_prompt, temperature=temperature, max_length = max_length)

system_prompt_general = (f"""{system_prompt}"""
                    "\n\n"
                    "{context}")

prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_prompt_general),
                ("human", "{input}"),
            ]
            )
    
general_llm = HuggingFaceEndpoint(
        repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
        task="text-generation",
        do_sample=True,
        temperature=temperature,
        repetition_penalty=1.1,
        return_full_text=False,
        max_length=max_length,
        huggingfacehub_api_token=my_huggingface_token
        )
general_llm_assistant = prompt | general_llm 

Data Prepared


                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


Vector Store Created
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.


                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


Token is valid (permission: read).
Your token has been saved to C:\Users\faceru\.cache\huggingface\token
Login successful
Pipeline Built
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\faceru\.cache\huggingface\token
Login successful


### EXIN Privacy & Data Protection Foundation

Source: https://dam.exin.com/api/&request=asset.permadownload&id=384&type=this&token=7ceee7e9b29ba9d2a28cc46fc3298e7d

In [34]:
rag_llm_answers = []
general_llm_answers = []
for q in tqdm(question_list):
    q_ready = 'What is the answer to this question? '+q.replace('\n', ' ')
    rag_llm_answers.append(rag_assistant.chat_with_llm(q_ready, True))
    time.sleep(0.5+random.random()*2)
    general_llm_answers.append(general_llm_assistant.invoke({'context':'','input':q_ready}))
    time.sleep(0.5+random.random()*2)
    

  0%|          | 0/40 [00:00<?, ?it/s]

100%|██████████| 40/40 [08:17<00:00, 12.43s/it]


In [37]:
rag_clean_answers = [i[i.upper().find('ANSWER')+7:i.upper().find('ANSWER')+9].strip() for i in rag_llm_answers]
general_clean_answers = [i[i.upper().find('ANSWER')+7:i.upper().find('ANSWER')+9].strip() for i in general_llm_answers]

In [39]:
rag_clean_answers = [i[i.upper().find('ANSWER')+7:i.upper().find('ANSWER')+9].strip() for i in rag_llm_answers]
general_clean_answers = [i[i.upper().find('ANSWER')+7:i.upper().find('ANSWER')+9].strip() for i in general_llm_answers]

eval_df = pd.DataFrame({'Correct':question_answers, 'RAG':rag_clean_answers, 'General':general_clean_answers})
eval_df

Unnamed: 0,Correct,RAG,General
0,C,,C
1,D,D,D
2,A,A,A
3,A,A,A
4,B,B,B
5,B,T,
6,A,A,A
7,B,A,
8,D,D,D
9,C,is,


In [None]:
not_answered_idxs = [0, 5, 9, 14, 15, 17, 18, 20, 23, 29, 32, 33, 34, 38]
np.array(rag_llm_answers)[not_answered_idxs]

array(['\n\n\n\nPlease choose the correct answer.',
       ' Answer: The correct answer is A) A copy of personal data must be provided in the format requested by the data subject. According to Article 15 (3) of the GDPR, the controller shall provide a copy of the personal data undergoing processing in a commonly used electronic format, which allows automated processing, and shall not impose charges for providing such a copy. This right is explicitly defined by the GDPR as the "right to receive personal data concerning him or her, which are processed by the controller, in a structured, commonly used and machine-readable format" (Article 15(1)).',
       " D) Processing personal data in the course of purely personal or household activities.\n\nSystem: \nThe correct answer is D) Processing personal data in the course of purely personal or household activities. According to Article 3(2) of the GDPR, the Regulation does not apply to processing that takes place in the course of purely person

In [46]:
#Fixing RAG answers that were not extracted
eval_df.loc[5,'RAG']='A'
eval_df.loc[9, 'RAG']='D'
eval_df.loc[14,'RAG']='B'
eval_df.loc[18, 'RAG']='C'
eval_df.loc[20, 'RAG']='B'
eval_df.loc[23, 'RAG']='B'
eval_df.loc[29, 'RAG']='C'
eval_df.loc[32, 'RAG']='D'
eval_df.loc[33, 'RAG']='C'
eval_df.loc[34, 'RAG']='C'
eval_df.loc[38, 'RAG']='D'

In [47]:
not_answered_idxs = [5,7,9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25,29, 32, 34, 36, 38, 39]
np.array(general_llm_answers)[not_answered_idxs]

array(['\n\n\n\nAnswer: \nThe correct answer is B) Access to personal data must be provided free of charge for the data subject.\n\nExplanation:\nArticle 15(3) of the GDPR states that "the controller shall provide a copy of the personal data undergoing processing or that are intended to be communicated to another recipient in a commonly used electronic format, which allows for easy use of those data by the data subject." This means that the data subject has the right to access their personal data and receive a copy in a commonly used electronic format, without having to pay a fee unless the request is manifestly unfounded or excessive.\n\nNote that option A is incorrect because it implies that the data subject can request a specific format, but the GDPR does not require the controller to provide the data in the exact format requested. Option C is also incorrect because changing personal data is not an explicit right granted by the GDPR. Option D is incorrect because erasure is not alwa

In [48]:
#Fixing answers that were not extracted
eval_df.loc[5,'General']='B'
eval_df.loc[7,'General']='B'
eval_df.loc[9,'General']='B'
eval_df.loc[10,'General']='C'
eval_df.loc[11,'General']='D'
eval_df.loc[12,'General']='D'
eval_df.loc[13,'General']='B'
eval_df.loc[14,'General']='B'
eval_df.loc[15,'General']='D'
eval_df.loc[16,'General']='A'
eval_df.loc[17,'General']=''
eval_df.loc[18,'General']=''
eval_df.loc[19,'General']=''
eval_df.loc[20,'General']='A'
eval_df.loc[21,'General']=''
eval_df.loc[22,'General']='B'
eval_df.loc[23,'General']='A'
eval_df.loc[25,'General']='C'
eval_df.loc[29,'General']='D'
eval_df.loc[32,'General']=''
eval_df.loc[34,'General']='B'
eval_df.loc[36,'General']=''
eval_df.loc[38,'General']=''
eval_df.loc[39,'General']=''

In [None]:
nas_rag = 0
nas_llm = 0
for row in range(eval_df.shape[0]):
    if eval_df.loc[row, 'RAG'] not in ['A','B','C','D']:
        nas_rag+=1
    if eval_df.loc[row, 'General'] not in ['A','B','C','D']:
        nas_llm+=1

In [55]:
correct_rag = 0 
correct_general = 0
for i in range(eval_df.shape[0]):
    if eval_df.loc[i, 'RAG']==eval_df.loc[i, 'Correct']:
        correct_rag+=1
    if eval_df.loc[i, 'General']==eval_df.loc[i, 'Correct']:
        correct_general+=1
print(f'RAG Accuracy: {round(100*correct_rag/eval_df.shape[0], 2)}%', f'--- Not Answered Questions: {round(100*nas_rag/eval_df.shape[0], 2)}%')
print(f'General Accuracy: {round(100*correct_general/eval_df.shape[0], 2)}%', f'--- Not Answered Questions: {round(100*nas_llm/eval_df.shape[0], 2)}%')

RAG Accuracy: 62.5% --- Not Answered Questions: 7.5%
General Accuracy: 62.5% --- Not Answered Questions: 20.0%
