In [1]:
"""
Imroving retrieval quality by implementing query-rewriting and contextual compression techniques
"""
print()




In [67]:
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
import boto3
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import json

In [68]:
load_dotenv(find_dotenv())

True

In [69]:
emb_model = OllamaEmbeddings(model="bge-m3:latest", num_thread=4)

In [None]:
vectorstore = Chroma(
    embedding_function=emb_model,
    collection_name='guides',
    persist_directory="../data/chroma_knowledge_base"
)

In [71]:
import os

In [72]:
# === AWS Configuration === #
COGNITO_REGION = os.getenv("COGNITO_REGION")
BEDROCK_REGION = os.getenv("BEDROCK_REGION")
MODEL_ID1 = os.getenv("MODEL_ID1")
MODEL_ID2 = os.getenv("MODEL_ID2")
IDENTITY_POOL_ID = os.getenv("IDENTITY_POOL_ID")
USER_POOL_ID = os.getenv("USER_POOL_ID")
APP_CLIENT_ID = os.getenv("APP_CLIENT_ID")
USERNAME = os.getenv("USERNAME")
PASSWORD = os.getenv("PASSWORD")

In [73]:
# === Helper: Get AWS Credentials === #
def get_credentials(username, password):
    idp_client = boto3.client("cognito-idp", region_name=COGNITO_REGION)
    response = idp_client.initiate_auth(
        AuthFlow="USER_PASSWORD_AUTH",
        AuthParameters={"USERNAME": username, "PASSWORD": password},
        ClientId=APP_CLIENT_ID,
    )
    id_token = response["AuthenticationResult"]["IdToken"]

    identity_client = boto3.client("cognito-identity", region_name=COGNITO_REGION)
    identity_response = identity_client.get_id(
        IdentityPoolId=IDENTITY_POOL_ID,
        Logins={f"cognito-idp.{COGNITO_REGION}.amazonaws.com/{USER_POOL_ID}": id_token},
    )

    creds_response = identity_client.get_credentials_for_identity(
        IdentityId=identity_response["IdentityId"],
        Logins={f"cognito-idp.{COGNITO_REGION}.amazonaws.com/{USER_POOL_ID}": id_token},
    )

    return creds_response["Credentials"]

In [74]:
def get_model():
    credentials = get_credentials(USERNAME, PASSWORD)
    bedrock = boto3.client(
        "bedrock-runtime",
        region_name=BEDROCK_REGION,
        aws_access_key_id=credentials["AccessKeyId"],
        aws_secret_access_key=credentials["SecretKey"],
        aws_session_token=credentials["SessionToken"],
    )

    return bedrock

In [80]:
def rewrite_query(query):
    
    BASE_PROMPT = (
"""
# Task

Your task is to re-write the user's query into ONE single search query for better document retrieval.

The re-written query has the following characteristics: 
- should capture the main user problem
- should be direct, straightforward (refer to "Charasteristics of direct questions")
- should be void of any sense of urgency or frustration that may have been present in the original query
- should retain all important information mentioned by the user "AS IS" and be void of unnecessary background information present in the original user query

Characteristics of direct questions:
- Starts with a question word: They often begin with words such as "who," "what," "where," "when," "why," or "how". 
- Uses an auxiliary verb: The auxiliary verb (like "do," "is," or "have") is inverted and comes before the subject. 
- Ends with a question mark: They conclude with a question mark (?) in writing.

---

## Input

Original User Query:
{query}

---

## Output

One single consice search query optimized for efficient document retrieval formatted as below: 
"<GENERATED_OUTPUT>"
"""
).strip()

    body = json.dumps({
        "max_tokens": 1000,
        "temperature":0,
        "anthropic_version": "bedrock-2023-05-31",
        "messages": [{"role": "user", "content": BASE_PROMPT.format(query=query)}]
    })

    response = get_model().invoke_model(
        body=body,
        modelId=MODEL_ID1,
    )

    response_body = json.loads(response.get('body').read())
    assistant_response = response_body.get('content')[0].get('text')

    return json.loads(assistant_response)
    

In [None]:
eval_dataset = pd.read_csv('../data/datasets/canvas_community_forum.csv')

In [82]:
eval_dataset

Unnamed: 0,input,expected_output
0,"Hey, I'm a bit unsure about a message I sent t...",You sent it to yourself only. It will only sen...
1,I am a new student and my course starts tomorr...,"Unfortunately, we won't be able to directly he..."
2,„Ç≥„Éº„ÇπÁôªÈå≤„Åó„ÅüÊéàÊ•≠„ÅÆ„ÉÄ„ÉÉ„Ç∑„É•„Éú„Éº„Éâ„ÇíÊ∂à„Åô„Å´„ÅØ„Å©„ÅÜ„Åô„Çå„Å∞„ÅÑ„ÅÑ„Åß„Åô„ÅãÔºü,I believe this is your concern? How to delete ...
3,Today I was trying to keep up with my work in ...,It can depend on the teacher and how they orga...
4,How do I find my unoffical transcripts in canv...,Since this question appears to be school-speci...
5,"During my recent exam, I accidentally clicked ...","Yes, that action would be recorded in the quiz..."
6,I need the QR code for Canvas,You can locate the QR code for your Canvas log...
7,Unable to play videos due to privacy settings....,Without knowing more about the specific videos...
8,"I am a student, I opened my account to pass my...",Your teacher may have set this assignment up a...
9,"I just took a two-hour in-person exam, on pape...",IT (and some professors) can likely check the ...


In [83]:
import textwrap

In [84]:
pp = textwrap.TextWrapper(width=100)

In [85]:
eg_response = rewrite_query(eval_dataset.loc[0,'input'])

In [86]:
print(pp.fill(eval_dataset.loc[0,'input']))

Hey, I'm a bit unsure about a message I sent through Canvas. I'm wondering if it went just to me, or
to the whole class. In the "To:" field, it only shows my name, and the message is only in my sent
folder. I can see an option to add all 60 classmates, but I'm worried I might have accidentally sent
a personal reminder to everyone. I thought it was fine initially, but now I'm second-guessing
myself. Could someone clarify: if only my name is listed in the "To:" field, did only I get the
message? Also, if it did go to the entire class, how can I avoid this in the future? Thanks for any
help!


In [87]:
eg_response

"How can I verify if a Canvas message was sent only to myself when my name is the only one in the 'To:' field?"

In [96]:
def contextual_compression(query, docs):

    BASE_PROMPT = (
"""
Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return empty string ("").

Remember, *DO NOT* edit the extracted parts of the context.

> Question: {query}
> Context:
>>>
{context}
>>>

Output extracted relevant parts as a SINGLE string: "<CONCATENATED_EXTRACTED_RELEVANT_PARTS>".
"""
).strip()

    compressed_docs = []

    for doc in docs:
        body = json.dumps({
            "max_tokens": 1024,
            "temperature":0,
            "anthropic_version": "bedrock-2023-05-31",
            "messages": [{"role": "user", "content": BASE_PROMPT.format(query=query,context=doc.page_content)}]
        })

        response = get_model().invoke_model(
            body=body,
            modelId=MODEL_ID1,
        )

        response_body = json.loads(response.get('body').read())
        assistant_response = response_body.get('content')[0].get('text')

        compressed_docs.append(assistant_response)

    return compressed_docs

In [97]:
def retriever(query):

    optimized_query = rewrite_query(query)
    retriever = vectorstore.as_retriever()
    compressed_docs = contextual_compression(query, retriever.invoke(optimized_query))

    return compressed_docs

In [99]:
from deepeval.test_case import LLMTestCase

In [100]:
test_cases = []

for input, expected_output in eval_dataset.values.tolist(): 

    actual_output = ""
    retrieval_context = retriever(input)


    test_case = LLMTestCase(
        input=input,
        actual_output=actual_output,
        retrieval_context=retrieval_context,
        expected_output=expected_output
    )

    test_cases.append(test_case)

In [106]:
from deepeval.models import GPTModel
from deepeval import evaluate
from deepeval.metrics import ContextualRelevancyMetric

In [107]:
model = GPTModel(
    model="o4-mini",
    temperature=1
)

In [None]:
os.environ["DEEPEVAL_RESULTS_FOLDER"]="../data/eval_results"

In [109]:
metric = ContextualRelevancyMetric(
    threshold=0.7,
    model=model,
    include_reason=True
)

In [110]:
evaluate(test_cases=test_cases, metrics=[metric])

Output()



Metrics Summary

  - ‚úÖ Contextual Relevancy (score: 0.8181818181818182, threshold: 0.7, strict: False, evaluation model: o4-mini, reason: The score is 0.82 because the notes claim 'No statements found in provided context.' but the context actually states 'You can view a Canvas login QR code in your Canvas web browser.', error: None)

For test case:

  - input: I need the QR code for Canvas
  - actual output: 
  - expected output: You can locate the QR code for your Canvas login by clicking the 'Account' button via your Global Navigation menu and clicking QR for mobile login. For more information, feel free to check out this guide: How do I view a QR code to log in to the Canvas mobile apps?
  - context: None
  - retrieval context: ['"You can view a Canvas login QR code in your Canvas web browser. You can scan this QR code with your mobile device to log in to the Canvas apps without entering your institution URL and user credentials. You can use the QR code to access the Canvas Stud



In [None]:
# 5.26% ---> 36.84% improvement in Context Relevancy metric after implementing query-rewriting and contextual compression techniques
# x7 performance improvement!

In [None]:
# query-rewriting may not be used in final product as llm is already filtering out noise from user queries and finding the root problem. This was noticed from miltiple experiments and review of chat logs. This is happening due to llm using it own NLP capabilities/ reasoning. In addition to that, I have given it a tool which takes only a single question as input. So effectively llm is already doing query-rewriting internally by converting user message to a single question.
# contextual compression will be used in MVP to improve retrieval quality. This will also reduce the chunk size of each retrieved document leading to lesser LLM input tokens and reduced cost.
# NOTE: both query-rewriting and contextual compression add latency to the retrieval process.