# Create a baseline FAQ-answering RAG

In [1]:
# Path to the data file
faq_path = r"D:\Work\Development\RAG-agent\data\Anadea homework -Tee Customizer FAQ.txt"

In [2]:
# The question is the key, the answer is the value

with open(faq_path, 'r') as file:
    data = file.read()

import re

pattern = re.compile(r'Q:(.*)\nA:(.*)')

parsed_data = re.findall(pattern , data)

doc = [{"question": q.strip(), "answer": a.strip()} for q, a in parsed_data]

In [3]:
import chromadb
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# Initialize ChromaDB client (persistent storage in "./chroma_db")
chroma_client = chromadb.PersistentClient(path="../chroma_db")

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [62]:
# Using question embeddings as a key to answers
# User enters query, this query is compared to all the embedded questions and we get the answer to the most similar question

In [5]:
# Extract only questions for embedding, while storing answers in metadata
questions = [item["question"] for item in doc]
metadatas = [{"question": item["question"], "answer": item["answer"]} for item in doc]

# Store question embeddings in ChromaDB with answers in metadata
faq_collection = Chroma.from_texts(questions, embedding_model, metadatas=metadatas, client=chroma_client, collection_name="faq_collection")

print("Stored question embeddings with answer metadata in ChromaDB!")

Stored question embeddings with answer metadata in ChromaDB!


In [63]:
# Load ChromaDB
retriever = Chroma(client=chroma_client, collection_name="faq_collection", embedding_function=embedding_model).as_retriever()

def query_chroma_db(query):
    results = retriever.invoke(query, k=1)  # Retrieve top 1 most relevant
    if results:
        return results[0].metadata  # Return the stored answer
    return "No relevant answer found."

# Example Queries
print(query_chroma_db("How do I return an item?"))  # Should return: "We accept returns within 30 days of purchase."
print(query_chroma_db("Where is my order?"))  # Should match the tracking query
print(query_chroma_db("Can i make my personal clothes?"))

print(query_chroma_db("What is the name of my dog?")) # Fails the test

{'answer': 'Custom t-shirts can be returned within 30 days of delivery, provided they are in their original condition.', 'question': 'What is your return policy for custom t-shirts?'}
{'answer': 'You can track your order by logging into your account and viewing your order history.', 'question': 'How can I track my order?'}
{'answer': 'Yes, you can upload your design or choose from our library of designs.', 'question': 'Can I add my own design to the t-shirt?'}
{'answer': 'Our headquarters is located in Riga, Latvia, and we operate multiple production facilities to ensure efficient service and delivery.', 'question': 'Where is TeeCustomizer located?'}


Problem:
- What if we don't have the actual answer to the question? Vector database will give irrelevant answer.

In [66]:
retriever.invoke(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
)

[Document(id='05b62368-9718-4795-a824-bd29dbd627da', metadata={'answer': 'Yes, we offer a variety of design templates to help you get started.', 'question': 'Do you offer design templates?'}, page_content='Do you offer design templates?'),
 Document(id='72611e7c-cdd6-4502-b8c3-7e0f8e518f75', metadata={'answer': 'Yes, we offer a variety of design templates to help you get started.', 'question': 'Do you offer design templates?'}, page_content='Do you offer design templates?')]

# Create a baseline customization getter tool

In [8]:
# Path to the data file
customizations_path = r"D:\Work\Development\RAG-agent\data\Anadea homework - Tee Customizer Shirts.txt"

In [57]:
with open(customizations_path, 'r', encoding="utf-8") as file:
    data = file.read()

import re

pattern = r"(\w+):\n(-.+)+"
matches = re.findall(pattern, data)

In [58]:
matches

[('Styles', '- Crew Neck'),
 ('Genders', '-Male'),
 ('Colors', '- White'),
 ('Sizes', '- XS, S, M, L, XL, XXL'),
 ('Options', '- Screen Printing')]

In [61]:
pattern = r"^([\w\s]+):\s*\n((?:-\s*.+\n?)+)"

matches = re.findall(pattern, data, re.MULTILINE)

# Parsing the results into a dictionary
parsed_data = {}

for category, options in matches:
    options_list = re.findall(r"-\s*(.+)", options)  # Extract individual options
    parsed_data[category] = options_list

# Output the structured data
for category, options in parsed_data.items():
    print(f"{category}: {options}")


Genders: ['Male', 'Female', 'Unisex']

Colors: ['White', 'Black', 'Blue', 'Red', 'Green', 'Custom Colors']

Sizes: ['XS, S, M, L, XL, XXL']

Printing Options: ['Screen Printing', 'Embroidery', 'Heat Transfer', 'Direct-to-Garment']


# Langchain guide

In [1]:
# Path to the data file
faq_path = r"D:\Work\Development\RAG-agent\data\Anadea homework -Tee Customizer FAQ.txt"
# The question is the key, the answer is the value

with open(faq_path, 'r') as file:
    data = file.read()

import re

pattern = re.compile(r'Q:(.*)\nA:(.*)')

parsed_data = re.findall(pattern , data)

doc = [{"question": q.strip(), "answer": a.strip()} for q, a in parsed_data]
import chromadb
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# # Initialize ChromaDB client (persistent storage in "./chroma_db")
# chroma_client = chromadb.PersistentClient(path="../chroma_db")

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain_chroma import Chroma

vector_store = Chroma(embedding_function=embedding_model)

In [3]:
vector_store.add_texts([i["question"] for i in doc], metadatas=doc)

['9bbec5fc-a0e5-4b3b-9861-ac27540dc0d3',
 '9f730236-c138-4b7f-9f6a-db379a9a595d',
 'e3385c4c-91c6-4d31-900c-7644058c70b5',
 'b6e1006c-e150-4191-8bf3-bd6c4cb4e346',
 '09270cbd-c42e-413b-86b2-5a13586cde0e',
 '29bc255a-9598-48ba-a288-4d76bcb03458',
 '54196628-dee9-4a73-a8a1-c3b100584aee',
 '27bf8be1-eaf3-4218-9969-5aafe2b07988',
 'f8354ba1-fba3-476a-9392-b64666b1fc16',
 'daea841d-7fbb-4860-b6cb-9e1efcbed9c1',
 '9b0bed3a-312b-44eb-8473-d7597c6a46bc',
 '14788006-9a14-40ef-b8cf-bf38f4dfb147',
 'c7819179-68af-4a66-a38d-edbe7365ccaa',
 '0dc72056-8ca3-454a-97c4-e4e51ce6455a',
 'e408a9da-6539-46d6-9d48-7b17dab1feb2',
 '32c008e0-8d4e-4b5d-8cbb-740298b4e69c',
 '8062f9fc-34b4-443e-b8b4-946fd7f03eff',
 '5e0a9366-0a13-484e-b307-0bfc415dd456',
 'afa20ea9-d0ae-4053-a34d-b7deea633327',
 'e77795f5-9e53-4b71-976a-837742ae6f54',
 '914be579-34cb-4903-9a3d-0ce5b64531dc',
 'ec24fe71-7824-4fe0-953c-9a3759ef00a3',
 'f39c1f15-f632-4d6d-acbe-865fc5a56b75',
 'd4cdc0e2-a912-4955-a571-6f794c0039a9',
 '08908dc8-5850-

In [4]:
vector_store.similarity_search(query="How do I return an item?", k=1)[0].metadata["answer"]

'Custom t-shirts can be returned within 30 days of delivery, provided they are in their original condition.'

In [6]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)"}
).to_messages()

assert len(example_messages) == 1
print(example_messages[0].content)



You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:


In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-4bit-32g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
system_message = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:"""

prompt = "How do i return the item?"



print("\n\n*** Generate:")

answer = vector_store.similarity_search(query=prompt, k=1)[0].metadata["answer"]
context = f"Context:::{answer}"
prompt += context

prompt_template=f'''<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
'''

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()

output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

# Inference can also be done using transformers' pipeline

print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

print(pipe(prompt_template)[0]['generated_text'])

  from .autonotebook import tqdm as notebook_tqdm


ImportError: Found an incompatible version of auto-gptq. Found version 0.3.0, but only version >= 0.4.99 are supported

# Building agents with Smolagents

In [2]:
# Path to the data file
faq_path = r"D:\Work\Development\RAG-agent\data\Anadea homework -Tee Customizer FAQ.txt"
# The question is the key, the answer is the value

with open(faq_path, 'r') as file:
    data = file.read()

import re

pattern = re.compile(r'Q:(.*)\nA:(.*)')

parsed_data = re.findall(pattern , data)

doc = [{"question": q.strip(), "answer": a.strip()} for q, a in parsed_data]


In [4]:
import chromadb
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# # Initialize ChromaDB client (persistent storage in "./chroma_db")
# chroma_client = chromadb.PersistentClient(path="../chroma_db")

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [10]:
faq_store = chromadb.PersistentClient(path="../chroma_db")

In [9]:
from smolagents import ToolCallingAgent, tool
from fuzzywuzzy import process

# Sample FAQ database
faq_data = {
    "What is your return policy?": "You can return items within 30 days of purchase with a receipt.",
    "How can I track my order?": "Use the tracking link provided in your confirmation email.",
    "Do you offer international shipping?": "Yes, we ship to most countries worldwide.",
}

# Function to find the best matching FAQ answer
def retrieve_faq_answer(query, faq_data, threshold=70):
    best_match, score = process.extractOne(query, faq_data.keys())
    if score >= threshold:
        return faq_data[best_match]
    return "I don't know."

# Define the agent
class FAQAgent(ToolCallingAgent):
    def run(self, query):
        return retrieve_faq_answer(query, faq_data)

# Instantiate and use the agent
faq_agent = FAQAgent()
while True:
    user_input = input("Ask me a question: ")
    if user_input.lower() in ["exit", "quit"]:
        break
    response = faq_agent.run(user_input)
    print("Bot:", response)


TypeError: ToolCallingAgent.__init__() missing 2 required positional arguments: 'tools' and 'model'

In [None]:
from rank_bm25 import BM25Okapi
from sentence_transformers import util

# Tokenize questions for BM25
tokenized_corpus = [q.lower().split() for q in questions]
bm25 = BM25Okapi(tokenized_corpus)

def hybrid_retrieval(user_question):
    # Get dense retrieval (FAISS)
    query_embedding = model.encode([user_question], convert_to_numpy=True)
    D, I = index.search(query_embedding, k=3)  # Retrieve top-3 candidates

    # Get sparse retrieval (BM25)
    tokenized_query = user_question.lower().split()
    bm25_scores = bm25.get_scores(tokenized_query)
    bm25_best_idx = int(np.argmax(bm25_scores))

    # Merge results (Choose the best match)
    all_candidates = list(set([I[0][0], bm25_best_idx]))
    best_idx = max(all_candidates, key=lambda idx: bm25_scores[idx] + 1/(D[0][0]+1e-5))
    
    return qa_pairs[best_idx]["answer"]

# Example usage
print(hybrid_retrieval("Who is the author of 1984?"))