In [1]:
from cag.embeddings import SentenceTransformerEmbeddings
from cag.models import ChatOllama
import copy

  from tqdm.autonotebook import tqdm, trange





In [25]:
embeddings_model = SentenceTransformerEmbeddings('sentence-transformers/all-mpnet-base-v2')

In [26]:
qwen = ChatOllama(model = 'qwen2.5', temprature = 0.001)

In [2]:
llama = ChatOllama(model = 'llama3.2', temprature = 0.001)

In [27]:
import numpy as np

def answer_relevancy(generated_answer, original_query):
    
    prompt = """i will give you a answer , please generate three question which we can derive from that answer.
    use this format for generation :  start the generation with "---" and end it with "---" too ; between the questions you should include "---" as well . like this format bellow : 
    
    ---
    Question number 1
    ---
    Question number 2 
    ---
    Question number 3
    --- 
    
    Here is the answer : 
    Answer : {answer}"""
    
    prompt = prompt.format(answer=generated_answer)
    
    
    generated_questions = llama.invoke(prompt).content
    reason = copy.deepcopy(generated_questions)

    
    generated_questions = [item for item in generated_questions.split('---') if len(item) > 7]

    #embed the question
    generated_questions = [embeddings_model.embed_query(question) for question in generated_questions]
    
    #embed the query
    original_query = embeddings_model.embed_query(original_query)
    
    generated_questions, original_query = np.array(generated_questions), np.array(original_query)
    
    # Normalize vectors
    vec1_norm = original_query / np.linalg.norm(original_query)
    vec_list_norm = generated_questions / np.linalg.norm(generated_questions, axis=1, keepdims=True)

    # Compute cosine similarity
    cosine_sim = np.dot(vec_list_norm, vec1_norm)
    
    return reason, np.mean(cosine_sim)
    

In [28]:
def context_relevancy(retrieved_context, original_query):
    
    prompt = """this is a context relevancy test. for the given context and question , extract each sentence of the context and determine if that sentence can potentially be helpful to answer the question. for every sentence , describe the relevancy of that sentence and answer in YES or NO terms which that sentence can be helpful to answer the question or not. 
    
    use this format : 
    
    Sentence  : a simple description of relevancy to the question : YES or NO
    
    Here is Question 
    Question : {query}
    
    Here is the Context :
    Context : {context}"""
    
    prompt = prompt.format(query = original_query, context = retrieved_context)
    
    output = qwen.invoke(prompt).content
    reason = copy.deepcopy(output)
    
    output = output.lower()
    
    score = output.count('yes') / (output.count('yes') + output.count('no'))
    
    return reason, score

In [29]:
def pseudo_context_generate(query):
    prompt = """for the given question, generate a simple and small passage that can answer the question.
    Here is the Question :
    
    Question : {question}
    """
    
    prompt = prompt.format(question = query)
    
    output = llama.invoke(prompt).content

    
    return output

In [30]:
def query_rewriting(query):
    
    prompt = f"""Please rewrite the query bellow for better retrieval in web search engines or retrieval augmented generation. just generate the rewrited query without any more explaination. generate only one rewrited query, only one.
    
    Here is the Query :
    Query : {query}
    """
    
    prompt = prompt.format(query = query)
    
    rewrited = llama.invoke(prompt).content

    return rewrited

# Loading CRSB and SQUAD

In [31]:
import json 

with open('F:\\OneDrive\\Desktop\\Research\\Dataset\\CRSB-Texts.json', 'r') as f:
    crsb = json.load(f)
    
crsb = crsb['amazon_rainforest']

In [32]:
import datasets

squad = datasets.load_dataset('rajpurkar/squad')
squad = squad['validation'].shuffle()

In [33]:
squad = squad[:100]

#this makes squad a dict like object with keys and values , values are lists

In [34]:
print(crsb.keys())
print(squad.keys())

dict_keys(['contents', 'questions'])
dict_keys(['id', 'title', 'context', 'question', 'answers'])


In [35]:
print(len(crsb['contents']))
print(len(squad['question']))

100
100


In [36]:
contexts = crsb['contents']
questions = squad['question']

# RAG retriever

In [37]:
from langchain.vectorstores import FAISS

retriever = FAISS.from_texts(texts=contexts,
                             embedding= embeddings_model)

# RAG Evaluation on CRSB + SQUAD

In [None]:
from time import time

crs = []
ars = []

for i, question in enumerate(questions):
    
    start = time()
    retrieved_context = retriever.similarity_search(query=question, k =1)
    _, ar = answer_relevancy(retrieved_context, question)
    _, cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


In [85]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

ARs mean : 0.20629166428308185
CRs mean : 0.016049019607843138


# CAG Evaluation on CRSB + SQUAD

In [13]:
import json
from cag.vector_candidates.vc import VectorCandidates

with open('F:\\OneDrive\\Desktop\\Research\\Dataset\\CRSB-Embeddings-MPNET.json', 'r') as f:
    crsb = json.load(f)
    
crsb_contexts_embeddings = crsb['amazon_rainforest']['contents']
crsb_pseudo_queries_embeddings = crsb['amazon_rainforest']['questions']

In [14]:
VC = VectorCandidates(contexts= [ crsb_contexts_embeddings ], questions= [ crsb_pseudo_queries_embeddings ])

In [15]:
from cag.gate.vector_candidates import VectorCandidatesGate

gate = VectorCandidatesGate(vc= VC, embedding_model= embeddings_model)

In [None]:
import time
import numpy as np

ars = []
crs = []

for i in range(100):

    a = time.time()
    
    needs_retrieval = gate(squad['question'][i])
    
    if needs_retrieval:
        retrieved_context = retriever.similarity_search(query=squad['question'][i], k =1)
        _, ar = answer_relevancy(retrieved_context, squad['question'][i])
        cr = context_relevancy(retrieved_context, squad['question'][i])
    
    else:
        pseudo_context = pseudo_context_generate(squad['question'][i])
        
        _, ar = answer_relevancy(pseudo_context, squad['question'][i])
        _, cr = context_relevancy(pseudo_context, squad['question'][i])
        
    ars.append(ar)
    crs.append(cr)
    b = time.time()
    print(f'Question {i} processed in {b - a} seconds')
    print(f'CR : {cr} --- AR : {ar}')

Question 0 processed in 91.55354261398315 seconds
CR : 0.16666666666666666 --- AR : 0.7838292693725731
Question 1 processed in 103.201819896698 seconds
CR : 0.6666666666666666 --- AR : 0.7051831108787031
Question 2 processed in 93.35385727882385 seconds
CR : 0.125 --- AR : 0.7843428505093316
Question 3 processed in 77.26798915863037 seconds
CR : 0.5 --- AR : 0.7164826458604162
Question 4 processed in 107.1562852859497 seconds
CR : 0.2727272727272727 --- AR : 0.6853714530236195
Question 5 processed in 86.5181212425232 seconds
CR : 0.6666666666666666 --- AR : 0.7831548127337072
Question 6 processed in 75.17269492149353 seconds
CR : 0.2857142857142857 --- AR : 0.6380235928656545
Question 7 processed in 91.82279992103577 seconds
CR : 0.3333333333333333 --- AR : 0.4922328638088171
Question 8 processed in 70.90986943244934 seconds
CR : 0.3333333333333333 --- AR : 0.7368721891085274
Question 9 processed in 109.96259093284607 seconds
CR : 0.0 --- AR : 0.5358087515057379
Question 10 processed i

In [None]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

# HYDE Evaluation on CRSB + SQUAD

In [24]:
from time import time

crs = []
ars = []
ar_reasons = []
cr_reasons = []
pseudos = []

for i, question in enumerate(questions):
    
    start = time()
    
    #HyDE Step
    pseudo_c = pseudo_context_generate(question)
    pseudos.append(pseudo_c)
    retrieved_context = retriever.similarity_search(query=pseudo_c, k =1)
    
    ar_reason, ar = answer_relevancy(retrieved_context, question)
    cr_reason, cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    ar_reasons.append(ar_reason)
    cr_reasons.append(cr_reason)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


Question 0 processed in 100.13581776618958 seconds
CR score: 0.19230769230769232, AR score: 0.4944928252734518
Question 1 processed in 75.42042684555054 seconds
CR score: 0.0, AR score: 0.22284076570328493
Question 2 processed in 114.21823072433472 seconds
CR score: 0.0, AR score: 0.48859031864997055
Question 3 processed in 66.26198053359985 seconds
CR score: 0.0, AR score: 0.11288573014537147
Question 4 processed in 87.20348119735718 seconds
CR score: 0.0, AR score: 0.16991901672506346
Question 5 processed in 118.29184055328369 seconds
CR score: 0.22727272727272727, AR score: 0.2688701565300964
Question 6 processed in 105.59598994255066 seconds
CR score: 0.0, AR score: 0.15224033465068446
Question 7 processed in 88.38196611404419 seconds
CR score: 0.0, AR score: 0.29801169318634596
Question 8 processed in 105.97073483467102 seconds
CR score: 0.20833333333333334, AR score: 0.10842776724278436
Question 9 processed in 72.96008062362671 seconds
CR score: 0.0, AR score: 0.11883237564125033

KeyboardInterrupt: 

In [25]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

ARs mean : 0.22826915120368851
CRs mean : 0.0431931270683821


# Query Rewriting Evaluation on CRSB + SQUAD

In [39]:
from time import time

crs = []
ars = []
ar_reasons = []
cr_reasons = []
rewriteds = []

for i, question in enumerate(questions):
    
    start = time()
    
    # Query Rewriting Step
    query_rewrited = query_rewriting(question)
    rewriteds.append(query_rewrited)
    retrieved_context = retriever.similarity_search(query=query_rewrited , k =1)
    
    ar_reason, ar = answer_relevancy(retrieved_context, question)
    cr_reason, cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    ar_reasons.append(ar_reason)
    cr_reasons.append(cr_reason)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


KeyboardInterrupt: 

In [None]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')