In [1]:
from cag.embeddings import SentenceTransformerEmbeddings
from cag.models import ChatOllama

embeddings_model = SentenceTransformerEmbeddings('sentence-transformers/all-mpnet-base-v2')

  from tqdm.autonotebook import tqdm, trange





In [2]:
model = ChatOllama(model = 'qwen2.5', temprature = 0.01)

In [3]:
import numpy as np

def answer_relevancy(generated_answer, original_query):
    
    prompt = """i will give you a answer , please generate three question which we can derive from that answer.
    use this format for generation :  start the generation with "---" and end it with "---" too ; between the questions you should include "---" as well . like this format bellow : 
    
    ---
    Question number 1
    ---
    Question number 2 
    ---
    Question number 3
    --- 
    
    Here is the answer : 
    Answer : {answer}"""
    
    prompt = prompt.format(answer=generated_answer)
    
    
    generated_questions = model.invoke(prompt).content

    
    generated_questions = [item for item in generated_questions.split('---') if len(item) > 7]

    #embed the question
    generated_questions = [embeddings_model.embed_query(question) for question in generated_questions]
    
    #embed the query
    original_query = embeddings_model.embed_query(original_query)
    
    generated_questions, original_query = np.array(generated_questions), np.array(original_query)
    
    # Normalize vectors
    vec1_norm = original_query / np.linalg.norm(original_query)
    vec_list_norm = generated_questions / np.linalg.norm(generated_questions, axis=1, keepdims=True)

    # Compute cosine similarity
    cosine_sim = np.dot(vec_list_norm, vec1_norm)
    
    return np.mean(cosine_sim)
    

In [4]:
def context_relevancy(retrieved_context, original_query):
    
    prompt = """this is a context relevancy test. for the given context and question , extract each sentence of the context and determine if that sentence can potentially be helpful to answer the question. for every sentence , describe the relevancy of that sentence and answer in YES or NO terms which that sentence can be helpful to answer the question or not. 
    
    use this format : 
    
    Sentence  : a simple description of relevancy to the question : YES or NO
    
    Here is Question 
    Question : {query}
    
    Here is the Context :
    Context : {context}"""
    
    prompt = prompt.format(query = original_query, context = retrieved_context)
    
    output = model.invoke(prompt).content
    
    output = output.lower()
    
    score = output.count('yes') / (output.count('yes') + output.count('no'))
    
    return score

In [5]:
def pseudo_context_generate(query):
    prompt = """for the given question, generate a context that can potentially answer the question. just generate the context without any other explaination . the context should contain at least 4 sentences 
    
    Here is the Question :
    Question : {question}
    """
    
    prompt = prompt.format(question = query)
    
    output = model.invoke(prompt).content

    
    return output

# Loading CRSB and SQUAD

In [6]:
import json 

with open('F:\\OneDrive\\Desktop\\Research\\Dataset\\CRSB-Texts.json', 'r') as f:
    crsb = json.load(f)
    
crsb = crsb['amazon_rainforest']

In [7]:
import datasets

squad = datasets.load_dataset('rajpurkar/squad')
squad = squad['validation'].shuffle()

Using the latest cached version of the dataset since rajpurkar/squad couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'plain_text' at F:\Models\huggingface\datasets\rajpurkar___squad\plain_text\0.0.0\7b6d24c440a36b6815f21b70d25016731768db1f (last modified on Mon Nov 25 19:23:30 2024).


In [8]:
squad = squad[:100]

#this makes squad a dict like object with keys and values , values are lists

In [9]:
print(crsb.keys())
print(squad.keys())

dict_keys(['contents', 'questions'])
dict_keys(['id', 'title', 'context', 'question', 'answers'])


In [10]:
print(len(crsb['contents']))
print(len(squad['question']))

100
100


In [11]:
contexts = crsb['contents']
questions = squad['question']

# RAG retriever

In [12]:
from langchain.vectorstores import FAISS

retriever = FAISS.from_texts(texts=contexts,
                             embedding= embeddings_model)

# RAG Evaluation on CRSB + SQUAD

In [None]:
from time import time

crs = []
ars = []

for i, question in enumerate(questions):
    
    start = time()
    retrieved_context = retriever.similarity_search(query=question, k =1)
    ar = answer_relevancy(retrieved_context, question)
    cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


In [None]:
from time import time

crs = []
ars = []

for i, question in enumerate(questions):
    
    start = time()
    retrieved_context = retriever.similarity_search(query=question, k =1)
    ar = answer_relevancy(retrieved_context, question)
    cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


In [85]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

ARs mean : 0.20629166428308185
CRs mean : 0.016049019607843138


# CAG Evaluation on CRSB + SQUAD

In [13]:
import json
from cag.vector_candidates.vc import VectorCandidates

with open('F:\\OneDrive\\Desktop\\Research\\Dataset\\CRSB-Embeddings-MPNET.json', 'r') as f:
    crsb = json.load(f)
    
crsb_contexts_embeddings = crsb['amazon_rainforest']['contents']
crsb_pseudo_queries_embeddings = crsb['amazon_rainforest']['questions']

In [14]:
VC = VectorCandidates(contexts= [ crsb_contexts_embeddings ], questions= [ crsb_pseudo_queries_embeddings ])

In [15]:
from cag.gate.vector_candidates import VectorCandidatesGate

gate = VectorCandidatesGate(vc= VC, embedding_model= embeddings_model)

In [None]:
import time
import numpy as np

ars = []
crs = []

for i in range(100):

    a = time.time()
    
    needs_retrieval = gate(squad['question'][i])
    
    if needs_retrieval:
        retrieved_context = retriever.similarity_search(query=squad['question'][i], k =1)
        ar = answer_relevancy(retrieved_context, squad['question'][i])
        cr = context_relevancy(retrieved_context, squad['question'][i])
    
    else:
        pseudo_context = pseudo_context_generate(squad['question'][i])
        
        ar = answer_relevancy(pseudo_context, squad['question'][i])
        cr = context_relevancy(pseudo_context, squad['question'][i])
        
    ars.append(ar)
    crs.append(cr)
    b = time.time()
    print(f'Question {i} processed in {b - a} seconds')
    print(f'CR : {cr} --- AR : {ar}')

Question 0 processed in 91.55354261398315 seconds
CR : 0.16666666666666666 --- AR : 0.7838292693725731
Question 1 processed in 103.201819896698 seconds
CR : 0.6666666666666666 --- AR : 0.7051831108787031
Question 2 processed in 93.35385727882385 seconds
CR : 0.125 --- AR : 0.7843428505093316
Question 3 processed in 77.26798915863037 seconds
CR : 0.5 --- AR : 0.7164826458604162
Question 4 processed in 107.1562852859497 seconds
CR : 0.2727272727272727 --- AR : 0.6853714530236195
Question 5 processed in 86.5181212425232 seconds
CR : 0.6666666666666666 --- AR : 0.7831548127337072
Question 6 processed in 75.17269492149353 seconds
CR : 0.2857142857142857 --- AR : 0.6380235928656545
Question 7 processed in 91.82279992103577 seconds
CR : 0.3333333333333333 --- AR : 0.4922328638088171
Question 8 processed in 70.90986943244934 seconds
CR : 0.3333333333333333 --- AR : 0.7368721891085274
Question 9 processed in 109.96259093284607 seconds
CR : 0.0 --- AR : 0.5358087515057379
Question 10 processed i

In [None]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')