In [None]:
!pip install transformers accelerate bitsandbytes sentence-transformers faiss-cpu bitsandbytes



## Logging in to hugging face

In [None]:
from huggingface_hub import login

login(input("Enter your hugging face token: "))

Enter your hugging face token: 


## Llama initialization

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)
llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


In [None]:
inputs = tokenizer("What is RAG system ?", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


What is RAG system? RAG system is a method used to categorize the risk associated with a project, process or activity. The term RAG stands for Red, Amber, Green, which are the three colors used to indicate the level of risk.
What is RAG


## Assistant prompts

In [None]:
import yaml

try:
  from google.colab import files
  print("please upload docs")
  uploaded = files.upload()
except:
  print("not running on colab")

with open("assistant-prompts.yml", "r") as f:   # change this to the correct path, this path is meant to work in colab
    PROMPTS = yaml.safe_load(f)

def clean_doc_text(doc):
    """
    Remove Markdown headers (#, ##, ###, etc.) and extra whitespace from a document.
    """

    doc = re.sub(r'^#{1,6}\s*', '', doc, flags=re.MULTILINE)

    doc = doc.strip()
    return doc

def build_prompt(prompt_type, context, question, retrieved_docs=None):
    cfg = PROMPTS[prompt_type]

    if retrieved_docs and len(retrieved_docs) > 0:
        cleaned_docs = [clean_doc_text(doc) for doc in retrieved_docs]
        sources_text = "\n\n".join(cleaned_docs)
        explanation_text = f"This answer is based on {len(retrieved_docs)} document(s) from the knowledge base."
    else:
        sources_text = "None"
        explanation_text = "No relevant documents found."

    prompt = f"""{cfg['role']}

Goal:
{cfg['goal']}

Context:
{context}

Question: {question}

{cfg['response_format']}
{explanation_text}
"""
    return prompt



please upload docs


Saving assistant-prompts.yml to assistant-prompts (1).yml


## FAISS

In [None]:
from sentence_transformers import SentenceTransformer, util
import faiss
import numpy as np

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

try:
  from google.colab import files
  print("please upload docs")
  uploaded = files.upload()
except:
  print("not running on colab")

with open("knowledge-base.md", "r") as f:
    kb_text = f.read()

raw_docs = kb_text.split("---")
documents = [doc.strip() for doc in raw_docs if doc.strip()]

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = embedder.encode(documents, convert_to_numpy=True)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

please upload docs


Saving knowledge-base.md to knowledge-base (1).md


## rag query

In [None]:
import re

def rag_query(query, threshold=2, k=3):
    qb = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(qb, k)

    retrieved_docs = [documents[i] for i in indices[0] if i < len(documents)]
    min_distance = float(distances[0][0]) if len(distances[0]) > 0 else 1.0

    if len(retrieved_docs) == 0 or min_distance > threshold:
        prompt_type = "no_context_prompt"
        context = ""
    elif len(retrieved_docs) == 1:
        prompt_type = "base_retrieval_prompt"
        context = retrieved_docs[0]
    else:
        prompt_type = "multi_doc_prompt"
        context = "\n\n".join(retrieved_docs)

    prompt = build_prompt(prompt_type, context, query, retrieved_docs=retrieved_docs)
    response = llm(prompt, max_new_tokens=512, temperature=0.2, do_sample=False)
    full_text = response[0]['generated_text'].strip()

    if "Answer:" in full_text:
        return full_text.split("Answer:", 1)[-1].strip()
    else:
        return full_text

rag_query("what is the refund time ?")

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'This answer is based on 3 document(s) from the knowledge base.\nAccording to Document 7: Returns and Refunds, refunds are processed within 7–10 business days after items are inspected. This is the standard timeframe for refunds. However, please note that this timeframe may vary depending on the specific circumstances of the return. For more information, please refer to Document 7.'

## Evaluating q&a
### using semantic evaluation



In [None]:
import re


qa_file = "ground-truth-qa.md"
with open(qa_file, "r") as f:
    qa_text = f.read()

qa_pairs = qa_text.split("---")
qa_pairs = [pair.strip() for pair in qa_pairs if pair.strip()]
print("QA: ", len(qa_pairs)-1)

def clean_text(text):
    text = re.sub(r"📖.*", "", text)
    text = re.sub(r"`+", "", text)
    text = text.replace("_____", "")
    return text.strip()

def evaluate_rag(k=3, threshold=0.7):
    total = len(qa_pairs) - 1
    correct = 0
    faithful = 0
    results = []

    for i, pair in enumerate(qa_pairs[1:], start=1):
        try:
            question, expected = pair.split('**Answer:**')
        except ValueError:
            print(f"Skipping malformed QA pair: {pair}")
            continue

        expected_answer = clean_text(expected)
        raw_answer = rag_query(question, k=k)
        generated_answer = clean_text(raw_answer)

        # semantic similarity
        sim = util.cos_sim(
            embedder.encode(generated_answer, convert_to_tensor=True),
            embedder.encode(expected_answer, convert_to_tensor=True)
        ).item()

        is_correct = sim >= threshold
        is_faithful = "not in the provided documents" not in generated_answer.lower()

        if is_correct:
            correct += 1
        if is_faithful:
            faithful += 1

        results.append({
            "Question": question.strip(),
            "Expected": expected_answer,
            "Answer": generated_answer,
            "Similarity": sim,
            "Correct": is_correct,
            "Faithful": is_faithful
        })

        print(f"[Q{i}] {question.strip()}")
        print(f"Expected: {expected_answer}")
        print(f"Got: {generated_answer.strip()}")
        print(f"Similarity: {sim:.2f} | Correct: {is_correct}\n")

    accuracy = (correct / total) * 100
    faithfulness_score = (faithful / total) * 100
    print("=== Final Evaluation ===")
    print(f"Accuracy: {accuracy:.1f}% ({correct}/{total})")
    print(f"Faithfulness: {faithfulness_score:.1f}% ({faithful}/{total})")

    return results, accuracy, faithfulness_score

results, accuracy, faithfulness = evaluate_rag(k=3, threshold=0.6)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


QA:  20


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q1] ### Q1: How do I create an account on Shoplite?
Expected: You can create an account using your email and password, or sign up with Google/Facebook login. A verified email is required for confirmations and password resets.
Got: This answer is based on 3 document(s) from the knowledge base.
To create an account on Shoplite, you need to sign up with a valid email address, a password, and acceptance of Shoplite's terms of service. You can also register using Google or Facebook login. After creating an account, you can manage profile details, shipping addresses, and payment methods from the Account Settings page. A verified email is required for order confirmations and password resets. Accounts that remain inactive for long periods may be flagged for review, but they are not automatically deleted. Shoplite ensures user data is stored securely, following encryption standards and privacy rules.
Similarity: 0.60 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q2] ### Q2: What payment methods does Shoplite support?
Expected: Shoplite accepts Visa, Mastercard, American Express, PayPal, and cash on delivery (COD in eligible regions). COD has a handling fee and is limited to orders under $200.
Got: This answer is based on 3 document(s) from the knowledge base.
Shoplite supports Visa, Mastercard, American Express, PayPal, and cash on delivery (COD) for eligible regions. COD has a handling fee and is only available for orders under $200. Prepaid methods (credit/debit cards, PayPal) are encouraged as they speed up delivery. Shoplite never stores card details directly; instead, tokenized information is saved with trusted payment providers. Failed payments may result in order cancellation unless corrected within 24 hours. Customers are advised not to share payment details with anyone claiming to represent Shoplite outside the official app or website.
Similarity: 0.94 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q3] ### Q3: How long does standard shipping take?
Expected: Standard shipping takes 5–7 business days. Express shipping takes 2–3 business days. Orders above $100 get free standard shipping.
Got: This answer is based on 3 document(s) from the knowledge base.
Standard shipping typically takes 5–7 business days. (Document 6: Shipping Policy)
Similarity: 0.82 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q4] ### Q4: Can I cancel an order after it ships?
Expected: Orders can only be freely canceled during the Processing stage. Once shipped, cancellations are not guaranteed and require contacting customer support.
Got: This answer is based on 3 document(s) from the knowledge base.
According to Document 18: Cancellation Policy, once an order is shipped, cancellation is not guaranteed. Document 4: Ordering Process also states that cancellations are possible while the order is still in the Processing stage, but after that, cancellation requires contacting customer support. Document 17: Order Tracking shows that tracking status updates to "Shipped" when the order is shipped, indicating that the order has left the Processing stage. Therefore, the answer is: **No, you cannot cancel an order after it ships.**


Please let me know if you need any further assistance.
Similarity: 0.79 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q5] ### Q5: What is Shoplite Drops?
Expected: Drops are limited-time product launches (like sneakers or electronics) with limited stock. Orders are first-come-first-serve, and they cannot be canceled once confirmed.
Got: This answer is based on 3 document(s) from the knowledge base.
Shoplite Drops refer to the exclusive or limited-time products that go live on the platform. These drops usually include electronics, sneakers, or special collaborations. Each drop is announced in advance, and orders are first-come-first-serve. Quantities are often limited, and orders in drops cannot be canceled once confirmed. Shoplite uses queueing systems to handle high demand during drops, and users may face waiting rooms before checkout.
Similarity: 0.73 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q6] ### Q6: What is Shoplite’s return policy?
Expected: Returns are allowed within 30 days of delivery if items are unused, in original packaging, and with proof of purchase. Some items like perishable goods or digital downloads are not eligible. Refunds are processed within 7–10 business days.
Got: This answer is based on 3 document(s) from the knowledge base.
Shoplite allows returns within 30 days of delivery, provided items are unused, in original packaging, and accompanied by proof of purchase. Certain products like perishable goods, intimate wear, or digital downloads are not eligible for return. Refunds are processed within 7–10 business days after items are inspected. If the original payment was by credit card or PayPal, refunds are issued back to the same method. For COD orders, refunds are provided as store credit. Return shipping is free if the product was damaged, defective, or incorrect. Otherwise, return shipping costs may apply. Third-party products are covered by Shopli

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q7] ### Q7: How does Shoplite handle refunds for COD orders?
Expected: For COD purchases, refunds are provided as store credit instead of cash.
Got: This answer is based on 3 document(s) from the knowledge base.
Shoplite provides refunds for COD orders as store credit. Refunds are processed within 7-10 business days after items are inspected. If the original payment was by credit card or PayPal, refunds are issued back to the same method. For COD orders, refunds are provided as store credit. Return shipping is free if the product was damaged, defective, or incorrect. Otherwise, return shipping costs may apply. (Document 7: Returns and Refunds) COD has a handling fee and is only available for orders under $200. (Document 5: Payment Methods) Shoplite never stores card details directly; instead, tokenized information is saved with trusted payment providers. (Document 5: Payment Methods) Failed payments may result in order cancellation unless corrected within 24 hours. Customers are advis

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q8] ### Q8: Do all products come with a warranty?
Expected: Not all, but many items do. Most warranties last 6–12 months and cover defects in materials or workmanship. They don’t cover misuse or accidents.
Got: This answer is based on 3 document(s) from the knowledge base.
Yes, many Shoplite products come with a manufacturer's warranty, usually 6–12 months depending on the item. However, warranty does not cover damage from misuse, accidents, or normal wear and tear. (Document 8: Warranty Policy)
Similarity: 0.66 | Correct: True



You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q9] ### Q9: How do I track my order after purchase?
Expected: You can track order status from Processing → Shipped → Out for Delivery → Delivered in your account. Once shipped, you’ll also receive a courier tracking number.
Got: This answer is based on 3 document(s) from the knowledge base.
To track your order after purchase, you can log in to your Shoplite account and view the order status. The tracking information will show the steps from “Processing” to “Delivered.” You will receive notifications at each stage. If you have any issues or concerns, please report them to Shoplite within 48 hours of delivery. We will investigate with the courier and provide updates promptly when information is available.
Similarity: 0.71 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q10] ### Q10: What should I do if my package says “Delivered” but I didn’t receive it?
Expected: You should report it to Shoplite support within 48 hours. Shoplite will investigate with the courier.
Got: This answer is based on 3 document(s) from the knowledge base.
According to Document 17: Order Tracking, if tracking shows “Delivered” but the item is missing, customers should report within 48 hours. Shoplite will investigate with the courier. If you didn't receive your package and it says "Delivered" in your tracking, report the issue to Shoplite within 48 hours. We will investigate with the courier to resolve the issue.
Similarity: 0.68 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q11] ### Q11: Does Shoplite store my credit card details?
Expected: No. Shoplite does not store card details directly. Instead, it saves tokenized payment info through secure payment providers.
Got: This answer is based on 3 document(s) from the knowledge base.
Shoplite does not store your credit card details directly. Instead, tokenized information is saved with trusted payment providers. This ensures secure and compliant payment processing. (Document 5: Payment Methods)
Similarity: 0.90 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q12] ### Q12: How does the loyalty program work?
Expected: For every $1 spent, you earn 1 point. Points can be redeemed for discounts (100 points = $1 off). Points expire after 12 months of inactivity.
Got: This answer is based on 3 document(s) from the knowledge base.
The Shoplite Rewards loyalty program allows customers to earn points for every dollar spent. Points can be redeemed for discounts, and members receive early access to drops and special promotions. Points expire after 12 months of inactivity, and they cannot be transferred between accounts. Customers can track their points in the Account Settings page.
Similarity: 0.75 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q13] ### Q13: Can I combine multiple coupons in one order?
Expected: No. Coupons cannot be stacked — only one coupon can be applied per order.
Got: This answer is based on 3 document(s) from the knowledge base.
According to Document 13: Promotions and Discounts, "Discounts may not stack with other offers." This implies that you cannot combine multiple coupons in one order. However, it's always best to check the validity dates on coupons and verify with Shoplite support if you have any doubts.
Similarity: 0.76 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q14] ### Q14: Are reviews moderated on Shoplite?
Expected: Yes. Reviews are monitored for abusive or fake content and may be removed. Only verified customers can post reviews.
Got: This answer is based on 3 document(s) from the knowledge base.
Reviews are moderated on Shoplite. According to Document 14: Review System, Shoplite monitors reviews for inappropriate content and may remove fake or abusive reviews. This ensures that reviews are trustworthy and helpful for other shoppers.
Similarity: 0.62 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q15] ### Q15: How are third-party sellers managed?
Expected: Sellers must be verified before listing products. Shoplite holds their payments in escrow until delivery is confirmed. Poor-performing sellers may be removed.
Got: This answer is based on 3 document(s) from the knowledge base.
Third-party sellers on Shoplite are managed through a verification process and agreement to Shoplite's policies. Shoplite holds their payments in escrow until orders are confirmed delivered. If disputes arise, Shoplite mediates between the customer and seller. Seller ratings are publicly displayed, and low-performing sellers may be removed. Shoplite's return and refund policies apply to third-party products unless otherwise specified. This ensures a smooth and trustworthy shopping experience for customers.
Similarity: 0.80 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q16] ### Q16: Does Shoplite have a mobile app?
Expected: Yes, on iOS and Android. The app supports push notifications, biometric login, barcode scanning, and voice search. Offline browsing is also available.
Got: This answer is based on 3 document(s) from the knowledge base.
Yes, Shoplite has a mobile app available on iOS and Android. The app offers various features, including push notifications, biometric login, barcode scanning, voice search, and in-app chat support. Offline browsing is also available, allowing customers to save wishlists and carts without internet, syncing when online. The app is updated monthly with bug fixes and new features. Customers are encouraged to keep the app updated for the best experience.
Similarity: 0.68 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q17] ### Q17: How secure is my personal data on Shoplite?
Expected: Shoplite complies with GDPR, encrypts all data transfers, and lets users delete their accounts anytime. Payment details are never stored directly.
Got: This answer is based on 3 document(s) from the knowledge base.
Shoplite takes privacy seriously and complies with GDPR and other privacy laws. Personal data such as name, email, and address is collected for order fulfillment and customer service. Payment details are never stored directly. Shoplite uses encryption for all data transfers and performs regular audits of its systems to prevent breaches. Additionally, user data is stored securely, following encryption standards and privacy rules. Customers can request account deletion at any time, which removes personal data except for records required by law. Cookies are used to improve browsing, and customers can adjust cookie settings. Overall, Shoplite ensures the security and privacy of your personal data.
Similarity: 0

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q18] ### Q18: Does Shoplite offer student discounts?
Expected: Yes. Student discounts are sometimes offered through partner verification services.
Got: This answer is based on 3 document(s) from the knowledge base.
Yes, Shoplite sometimes offers student discounts, which require verification through partner services. (Document 13: Promotions and Discounts) These discounts are not mentioned in the loyalty program (Document 12), but they are mentioned in the Shoplite Overview (Document 1) as a type of promotion. The loyalty program does not provide information on student discounts. The Shoplite Overview mentions promotions, which include student discounts, but does not provide details on the loyalty program. The answer is accurate, concise, and brief.
Similarity: 0.76 | Correct: True



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[Q19] ### Q19: How does Shoplite contribute to the environment?
Expected: It uses recyclable packaging, eco-friendly couriers where possible, and offers “green shipping” as a slower but sustainable delivery option.
Got: This answer is based on 3 document(s) from the knowledge base.
Shoplite contributes to the environment by making sustainable choices. Packaging is recyclable, and customers are encouraged to recycle. The company partners with eco-friendly couriers where possible. Additionally, customers can opt for “green shipping,” which uses slower but lower-carbon delivery methods. Shoplite also supports charity campaigns, donating a portion of sales during special events to environmental causes. By making these choices, Shoplite aims to balance convenience with responsibility.
Similarity: 0.68 | Correct: True

[Q20] ### Q20: What should I do if the Shoplite app crashes?
Expected: First try clearing cache, updating the app, or restarting your device. If it persists, contact Shoplite 

# Ngrok and flask

In [None]:
!pip install flask pyngrok



In [None]:
import json
from flask import Flask, request, jsonify
from pyngrok import ngrok

app = Flask(__name__)

# /health route
@app.route("/health", methods=["GET"])
def health():
    return jsonify({"status": "ok", "message": "RAG system is running"})

# /ping route
@app.route("/ping", methods=["POST"])
def ping():
    data = request.get_json()
    query = data.get("query", "")
    if not query:
        return jsonify({"error": "No query provided"}), 400

    inputs = tokenizer(query, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=128)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return jsonify({"query": query, "response": answer})

# /chat route
@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json()
    query = data.get("query", "")
    if not query:
        return jsonify({"error": "No query provided"}), 400

    answer = rag_query(query)

    return jsonify({
        "query": query,
        "answer": answer
    })

ngrok_token = input("Enter your ngrok token: ")
ngrok.set_auth_token(ngrok_token)
public_url = ngrok.connect(5000).public_url
print(" * Ngrok tunnel open:", public_url)

app.run(port=5000)


Enter your ngrok token: 
 * Ngrok tunnel open: https://nondiaphanous-unonerously-mimi.ngrok-free.dev
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
