In [1]:
import pandas as pd
 

rag_df = pd.read_csv("rag_data.csv",delimiter=";")

In [2]:
rag_df.shape

(40541, 3)

In [3]:
rag_df["question"]= rag_df["question"].astype("string")
rag_df["answer"] = rag_df["answer"].astype("string")
rag_df["documents"] = rag_df["documents"].astype("string")

In [4]:
rag_df.sample(20).head()

Unnamed: 0,question,answer,documents
31200,How does 'Pinafore' use both historical contex...,"In 'Pinafore', humor is achieved through both ...",['Sullivan\'s generous supply of addictive mel...
36604,What is a recurring critique found in reviews ...,A recurring critique found in reviews of 'The ...,"['humor, the less said the better"". Frederik P..."
25623,List the Autodesk products with which Autodesk...,Autodesk Vault is integrated with Autodesk Inv...,['Autodesk beginning November 2016 and complet...
36613,Which director adapted Hergé's albums into the...,The 2011 film *The Adventures of Tintin* was d...,"['feature film """" (2011). Tintin and his frien..."
30890,Who was the editor involved in 'Watchmen' and ...,Len Wein was the editor of the 'Watchmen' comi...,['The film itself was released on DVD four mon...


In [1]:
import os 
import sys
sys.path.insert(0,os.path.dirname(os.getcwd()))

In [6]:
from scripts.ollama_model import Ollama_llm,Model_name,Template
Model_name = Model_name.QWEN
model = Ollama_llm(Model_name)


Initialized Ollama with model: qwen:4b


In [7]:
rag_df.index

RangeIndex(start=0, stop=40541, step=1)

In [8]:
# run a quick test
import random
sample_df = rag_df.sample(3)
n  = random.randrange(0,len(rag_df))
question = rag_df.loc[n,'question']
context= rag_df.loc[n,'documents']
answer = rag_df.loc[n,'answer']

print(f"question {question}\n\n")
print(f"context {context}\n\n")
print(f"Ground truth {answer}\n\n")
print(f"Bot answer {model.get_completion(query=question,context=context)}\n\n")

question How can the task of reading the first word and skipping the remaining characters until a new line in a text be understood using finite state machines?


context ['of symbols (characters); actions are not used. The example in figure 4 shows a finite state machine that accepts the string "nice". In this FSM, the only accepting state is state 7. A (possibly infinite) set of symbol sequences, aka. formal language, is called a regular language if there is some Finite State Machine that accepts exactly that set. For example, the set of binary strings with an even number of zeroes is a regular language (cf. Fig. 5), while the set of all strings whose length is a prime number is not. A machine could also be described as', 'had some fixed number of states, and there is a fixed number of states in the tape alphabet, the table has fixed size, and can therefore be computed by another finite state machine. This machine, however, will never need to backtrack, and hence is a DFA. Several var

In [9]:
sample_df= rag_df.sample(300)

In [14]:
import logging


for i, row in enumerate(sample_df.itertuples()):
    print(f"Started processing row index: {row.Index+1}")

    response = model.get_completion(query=row.question,context=row.documents)

    sample_df.loc[row.Index, "LLM_Response"] = response   

    print(f"Finished processing row index: {row.Index+1}")
    
# save results
sample_df.to_csv("rag_responses.csv") 

Started processing row index: 13715
Finished processing row index: 13715
Started processing row index: 25237
Finished processing row index: 25237
Started processing row index: 7804
Finished processing row index: 7804
Started processing row index: 3673
Finished processing row index: 3673
Started processing row index: 22681
Finished processing row index: 22681
Started processing row index: 16763
Finished processing row index: 16763
Started processing row index: 33735
Finished processing row index: 33735
Started processing row index: 26736
Finished processing row index: 26736
Started processing row index: 30487
Finished processing row index: 30487
Started processing row index: 21726
Finished processing row index: 21726
Started processing row index: 2276
Finished processing row index: 2276
Started processing row index: 13853
Finished processing row index: 13853
Started processing row index: 17221
Finished processing row index: 17221
Started processing row index: 11919
Finished processing r

In [32]:
sample_df = sample_df.reset_index().drop(columns=["index"])

In [33]:
sample_df.head().index

RangeIndex(start=0, stop=5, step=1)

## Evaluation

In [2]:
from scripts.rag_evaluators import RagEvaluator
import pandas as pd 

df= pd.read_csv("rag_responses.csv")

rag_evaluator= RagEvaluator()


In [None]:
# Test example
rag_evaluator.evaluate_relevance(query="Is Marie Curie is born in Paris?", 
    response="No, Marie Curie is born in Warsaw.")

{'relevance': 4.0,
 'gpt_relevance': 4.0,
 'relevance_reason': 'The response fully and accurately addresses the query, providing complete and correct information.',
 'relevance_result': 'pass',
 'relevance_threshold': 3}

In [6]:
df = df.drop(columns=["Unnamed: 0"])
df.head()

Unnamed: 0,question,answer,documents,LLM_Response
0,"In the context of language specifications, whi...","In the context of language specifications, the...",['társadalmi rendszerünknek a munka az alapja....,"In the context of language specifications, whi..."
1,In which type of region would you expect to fi...,"Based on the reference document, focused ecolo...",['an expansive amount of land which contains a...,In a region with high levels of human activity...
2,Is the percentage of the Hispanic or Latino po...,According to the 2010 United States Census dat...,['and 2.9% from two or more races. Those of Hi...,"Yes, the percentage of the Hispanic or Latino ..."
3,How did the role of Druze soldiers in the IDF ...,"Initially, non-Jewish minorities in the IDF, i...","['Originally, they served in the framework of ...",The role of Druze soldiers in the IDF evolved ...
4,Explain how Selenium IDE 3.x differs from the ...,Selenium IDE 3.x was developed to replace the ...,['by moving the Carbide.c++ plug-ins into a st...,Selenium IDE 3.x is a newer version of the Sel...


In [None]:


for i, row in enumerate(df.itertuples()):
    print(f"Started evaluating row index: {row.Index+1}")

    groundedness = rag_evaluator.evaluate_groundedness(query=row.question,context=row.documents,response=row.LLM_Response)

    relevance =  rag_evaluator.evaluate_relevance(query=row.question,response=row.LLM_Response)

    print(groundedness.groundedness_result)
    print(relevance.relevance_result)

    df.loc[row.Index, "groundedness"] = groundedness.groundedness 
    df.loc[row.Index, "groundedness_reason"] = groundedness.get("groundedness_reason")  
    df.loc[row.Index, "groundedness_result"] = groundedness.get("groundedness_result")



    df.loc[row.Index, "relevance"] = relevance.get("relevance") 
    df.loc[row.Index, "relevance_reason"] = relevance.get("relevance_reason")
    df.loc[row.Index, "relevance_result"] = relevance.get("relevance_result")  


    print(f"Finished evaluating row index: {row.Index+1}")
    
# save results
df.to_csv("rag_evaluation_resutls.csv")

Started evaluating row index: 1


AttributeError: 'dict' object has no attribute 'groundedness_result'