In [None]:
from cag.embeddings import SentenceTransformerEmbeddings
from cag.models import ChatOllama
import copy

In [11]:
embeddings_model = SentenceTransformerEmbeddings('sentence-transformers/all-mpnet-base-v2', device='cuda')

In [24]:
qwen = ChatOllama(model = 'qwen2.5', temprature = 0.001)

In [3]:
llama = ChatOllama(model = 'llama3.2', temprature = 0.001)

In [25]:
import numpy as np

def answer_relevancy(generated_answer, original_query):
    
    prompt = """i will give you a answer , please generate three question which we can derive from that answer.
    use this format for generation :  start the generation with "---" and end it with "---" too ; between the questions you should include "---" as well . like this format bellow : 
    
    ---
    Question number 1
    ---
    Question number 2 
    ---
    Question number 3
    --- 
    
    Here is the answer : 
    Answer : {answer}"""
    
    prompt = prompt.format(answer=generated_answer)
    
    
    generated_questions = llama.invoke(prompt).content
    reason = copy.deepcopy(generated_questions)

    
    generated_questions = [item for item in generated_questions.split('---') if len(item) > 7]

    #embed the question
    generated_questions = [embeddings_model.embed_query(question) for question in generated_questions]
    
    #embed the query
    original_query = embeddings_model.embed_query(original_query)
    
    generated_questions, original_query = np.array(generated_questions), np.array(original_query)
    
    # Normalize vectors
    vec1_norm = original_query / np.linalg.norm(original_query)
    vec_list_norm = generated_questions / np.linalg.norm(generated_questions, axis=1, keepdims=True)

    # Compute cosine similarity
    cosine_sim = np.dot(vec_list_norm, vec1_norm)
    
    return reason, np.mean(cosine_sim)
    

In [26]:
def context_relevancy(retrieved_context, original_query):
    
    prompt = """this is a context relevancy test. for the given context and question , extract each sentence of the context and determine if that sentence can potentially be helpful to answer the question. for every sentence , describe the relevancy of that sentence and answer in YES or NO terms which that sentence can be helpful to answer the question or not. 
    
    use this format : 
    
    Sentence  : a simple description of relevancy to the question : YES or NO
    
    Here is Question 
    Question : {query}
    
    Here is the Context :
    Context : {context}"""
    
    prompt = prompt.format(query = original_query, context = retrieved_context)
    
    output = qwen.invoke(prompt).content
    reason = copy.deepcopy(output)
    
    output = output.lower()
    
    score = output.count('yes') / (output.count('yes') + output.count('no'))
    
    return reason, score

In [27]:
def pseudo_context_generate(query):
    prompt = """for the given question, generate a simple and small passage that can answer the question.
    Here is the Question :
    
    Question : {question}
    """
    
    prompt = prompt.format(question = query)
    
    output = llama.invoke(prompt).content

    
    return output

In [28]:
def query_rewriting(query):
    
    prompt = f"""Please rewrite the query bellow for better retrieval in web search engines or retrieval augmented generation. just generate the rewrited query without any more explaination. generate only one rewrited query, only one.
    
    Here is the Query :
    Query : {query}
    """
    
    prompt = prompt.format(query = query)
    
    rewrited = llama.invoke(prompt).content

    return rewrited

# Loading CRSB and SQUAD

In [3]:
import json 

with open('F:\\OneDrive\\Desktop\\Research\\Dataset\\CRSB-Texts.json', 'r') as f:
    crsb = json.load(f)
    
crsb = crsb['amazon_rainforest']

In [5]:
import datasets

squad = datasets.load_dataset('rajpurkar/squad')
squad = squad['validation'].shuffle()

In [6]:
squad = squad[:100]

#this makes squad a dict like object with keys and values , values are lists

In [7]:
print(crsb.keys())
print(squad.keys())

dict_keys(['contents', 'questions'])
dict_keys(['id', 'title', 'context', 'question', 'answers'])


In [8]:
print(len(crsb['contents']))
print(len(squad['question']))

100
100


In [9]:
contexts = crsb['contents']
questions = squad['question']

# RAG retriever

In [12]:
from langchain.vectorstores import FAISS

retriever = FAISS.from_texts(texts=contexts,
                             embedding= embeddings_model)

# RAG Evaluation on CRSB + SQUAD

In [None]:
from time import time

crs = []
ars = []

for i, question in enumerate(questions):
    
    start = time()
    retrieved_context = retriever.similarity_search(query=question, k =1)
    _, ar = answer_relevancy(retrieved_context, question)
    _, cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


In [None]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

# CAG Evaluation on CRSB + SQUAD

In [None]:
import json
from cag.vector_candidates.vc import VectorCandidates

with open('F:\\OneDrive\\Desktop\\Research\\Dataset\\CRSB-Embeddings-MPNET.json', 'r') as f:
    crsb = json.load(f)
    
crsb_contexts_embeddings = crsb['amazon_rainforest']['contents']
crsb_pseudo_queries_embeddings = crsb['amazon_rainforest']['questions']

In [None]:
VC = VectorCandidates(contexts= [ crsb_contexts_embeddings ], questions= [ crsb_pseudo_queries_embeddings ])

In [None]:
from cag.gate.vector_candidates import VectorCandidatesGate

gate = VectorCandidatesGate(vc= VC, embedding_model= embeddings_model)

In [None]:
import time
import numpy as np

ars = []
crs = []

for i in range(100):

    a = time.time()
    
    _, needs_retrieval = gate(squad['question'][i])
    
    if needs_retrieval:
        retrieved_context = retriever.similarity_search(query=squad['question'][i], k =1)
        _, ar = answer_relevancy(retrieved_context, squad['question'][i])
        cr = context_relevancy(retrieved_context, squad['question'][i])
    
    else:
        pseudo_context = pseudo_context_generate(squad['question'][i])
        
        _, ar = answer_relevancy(pseudo_context, squad['question'][i])
        _, cr = context_relevancy(pseudo_context, squad['question'][i])
        
    ars.append(ar)
    crs.append(cr)
    b = time.time()
    print(f'Question {i} processed in {b - a} seconds')
    print(f'CR : {cr} --- AR : {ar}')

In [None]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

# HYDE Evaluation on CRSB + SQUAD

In [None]:
from time import time

pseudos = []

for i, question in enumerate(questions):
    
    start = time()
    
    #HyDE Step
    pseudo_c = pseudo_context_generate(question)
    pseudos.append(pseudo_c)
    
    end = time()
    
    print(f'Pseudo-Context {i} processed in {end - start} seconds')
    


In [None]:


crs = []
ars = []
ar_reasons = []
cr_reasons = []


for i, question in enumerate(questions):
    
    start = time()
    
    retrieved_context = retriever.similarity_search(query=pseudos[i], k =1)
    
    ar_reason, ar = answer_relevancy(retrieved_context, question)
    cr_reason, cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    ar_reasons.append(ar_reason)
    cr_reasons.append(cr_reason)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


In [None]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

# Query Rewriting Evaluation on CRSB + SQUAD

In [None]:
from time import time

rewriteds = []

for i, question in enumerate(questions):
    
    a = time()
    # Query Rewriting Step
    query_rewrited = query_rewriting(question)
    rewriteds.append(query_rewrited)
    
    b = time()
    
    print(f'Rewriting {i} processed in {b - a} seconds')

In [None]:
crs = []
ars = []
ar_reasons = []
cr_reasons = []


for i, question in enumerate(questions):
    
    start = time()
    retrieved_context = retriever.similarity_search(query=rewriteds[i] , k =1)
    
    ar_reason, ar = answer_relevancy(retrieved_context, question)
    cr_reason, cr = context_relevancy(retrieved_context, question)
    
    crs.append(cr)
    ars.append(ar)
    ar_reasons.append(ar_reason)
    cr_reasons.append(cr_reason)
    
    end = time()
    print(f'Question {i} processed in {end - start} seconds')
    print(f'CR score: {cr}, AR score: {ar}')


In [None]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

# Evaluating Adpotive-RAG on CRSB and SQUAD

In [17]:
import numpy as np
from time import time

adapt_reasons = []
classified = []
times = []

for i, question in enumerate(questions):
    
    a = time()
    x, y = query_classification(contexts[i], question)
    adapt_reasons.append(x)
    classified.append(y)
    b = time()
    
    times.append(b-a)
    
    print(f'Question {i} processed in {b - a} seconds')
    
print(np.mean(np.array(times)))

Question 0 processed in 17.21433663368225 seconds
Question 1 processed in 11.587442398071289 seconds
Question 2 processed in 16.3067626953125 seconds
Question 3 processed in 17.906259536743164 seconds
Question 4 processed in 14.323300838470459 seconds
Question 5 processed in 8.344442129135132 seconds
Question 6 processed in 10.027394533157349 seconds
Question 7 processed in 13.33013367652893 seconds
Question 8 processed in 9.292656183242798 seconds
Question 9 processed in 9.36040472984314 seconds
Question 10 processed in 12.670216083526611 seconds
Question 11 processed in 12.168281555175781 seconds
Question 12 processed in 12.082881927490234 seconds
Question 13 processed in 13.635783195495605 seconds
Question 14 processed in 11.90071702003479 seconds
Question 15 processed in 18.116410970687866 seconds
Question 16 processed in 14.134588718414307 seconds
Question 17 processed in 15.46907925605774 seconds
Question 18 processed in 13.189318895339966 seconds
Question 19 processed in 21.3196

In [36]:
import time
import numpy as np

ars = []
crs = []

for i, question in enumerate(questions):

    a = time.time()
    
    needs_retrieval = classified[i]
    
    if needs_retrieval:
        retrieved_context = retriever.similarity_search(question, k =1)
        _, ar = answer_relevancy(retrieved_context, question)
        cr = context_relevancy(retrieved_context, question)
    
    else:
        pseudo_context = pseudo_context_generate(question)
        
        _, ar = answer_relevancy(pseudo_context, question)
        _, cr = context_relevancy(pseudo_context, question)
        
    ars.append(ar)
    crs.append(cr)
    b = time.time()
    print(f'Question {i} processed in {b - a} seconds')
    print(f'CR : {cr} --- AR : {ar}')

Question 0 processed in 28.522886514663696 seconds
CR : 1.0 --- AR : 0.6100373989862063
Question 1 processed in 72.37753963470459 seconds
CR : 0.3333333333333333 --- AR : 0.49895905401186535
Question 2 processed in 41.77283191680908 seconds
CR : 0.5 --- AR : 0.758249211936661
Question 3 processed in 89.19079422950745 seconds
CR : 0.125 --- AR : 0.6986630425468255
Question 4 processed in 66.12485122680664 seconds
CR : 0.2 --- AR : 0.6486127845838747
Question 5 processed in 67.31260299682617 seconds
CR : 0.2 --- AR : 0.7017307862662019
Question 6 processed in 90.14641618728638 seconds
CR : 0.18181818181818182 --- AR : 0.5769554557977793
Question 7 processed in 138.16933584213257 seconds
CR : 0.5 --- AR : 0.7772152416862705
Question 8 processed in 78.70425844192505 seconds
CR : 0.36363636363636365 --- AR : 0.5699165148180174
Question 9 processed in 90.88233733177185 seconds
CR : 0.7777777777777778 --- AR : 0.6465251119158393
Question 10 processed in 61.7341046333313 seconds
CR : 0.25 --- 

KeyboardInterrupt: 

In [37]:
ars, crs = np.array(ars), np.array(crs)

print(f'ARs mean : {np.mean(ars)}')
print(f'CRs mean : {np.mean(crs)}')

ARs mean : 0.6138833199607734
CRs mean : 0.3886134867842185
