## Install Required Libraries

In [None]:
!pip install pymupdf sentence-transformers faiss-cpu langchain-text-splitters groq


Collecting pymupdf
  Downloading pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.7 kB)
Collecting langchain-text-splitters
  Downloading langchain_text_splitters-1.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting groq
  Downloading groq-0.37.0-py3-none-any.whl.metadata (16 kB)
Downloading pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m85.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_text_splitters-1.0.0-py3-none-any.whl (33 kB)
Downloading groq-0.37.0-py3-none-any.whl (137 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

##  UPLOAD PDF FILE and EXTRACT TEXT FROM PDF PAGES

In [None]:
from google.colab import files
import fitz  # PyMuPDF

# 1. Upload the PDF
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]
print("Using PDF:", pdf_path)

# 2. Extract text from all pages
doc = fitz.open(pdf_path)

all_text = ""
pages_text = []  # keep per-page text if needed

for page in doc:
    page_text = page.get_text()
    pages_text.append(page_text)
    all_text += page_text + "\n"

print("Total pages:", len(pages_text))
print("\nPreview of extracted text:\n")
print(all_text[:1500])


Saving a-beginners-guide-to-online-shopping.pdf to a-beginners-guide-to-online-shopping.pdf
Using PDF: a-beginners-guide-to-online-shopping.pdf
Total pages: 33

Preview of extracted text:

A beginner’s guide to 
doing your shopping online
Helping you to shop online
Publication date: February 2022

Contents
Introducing you to online shopping	
3
Using this guide	
6
Key terminology	
7
The benefits of online shopping	
11
Is online shopping safe?	
13
Setting up online shopping accounts	
16
●Shopping on Amazon	
18
Shopping at Morrisons online	
21
Shopping at Sainsbury’s online	
23
Shopping at Tesco online	
29
Next steps	
31
2  |  A beginner’s guide to doing your shopping online

1. Introducing you 
to online shopping 
3  |  A beginner’s guide to doing your shopping online

1. Introducing you to online shopping
Hello, 
Welcome to Age UK’s beginner’s guide to online shopping. We’ve 
created this guide to show you how to shop for groceries and other 
household items on the internet. We’ll talk 

## SPLIT EXTRACTED TEXT INTO CHUNKS

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Create splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\n\n", "\n", ".", " ", ""]
)

# Split into chunks (strings)
chunks = text_splitter.split_text(all_text)

print(f"Total chunks created: {len(chunks)}\n")
print("First chunk preview:")
print("-------------------------------------")
print(chunks[0][:500])


Total chunks created: 44

First chunk preview:
-------------------------------------
A beginner’s guide to 
doing your shopping online
Helping you to shop online
Publication date: February 2022

Contents
Introducing you to online shopping	
3
Using this guide	
6
Key terminology	
7
The benefits of online shopping	
11
Is online shopping safe?	
13
Setting up online shopping accounts	
16
●Shopping on Amazon	
18
Shopping at Morrisons online	
21
Shopping at Sainsbury’s online	
23
Shopping at Tesco online	
29
Next steps	
31
2  |  A beginner’s guide to doing your shopping online

1. Intr


## LOAD EMBEDDING MODEL & CREATE EMBEDDINGS

In [None]:
from sentence_transformers import SentenceTransformer

# Load free embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

def embed_text_local(text: str):
    # Returns a 1D vector
    return model.encode([text])[0]

# Create embeddings for all chunks
import numpy as np

embeddings = [embed_text_local(chunk) for chunk in chunks]

print("Embeddings created successfully!")
print("Embedding vector size:", len(embeddings[0]))
print("Total embeddings:", len(embeddings))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embeddings created successfully!
Embedding vector size: 384
Total embeddings: 44


##BUILD FAISS VECTOR STORE

In [None]:
import faiss
import numpy as np

# Convert embeddings list → float32 matrix
embedding_matrix = np.array(embeddings).astype("float32")

dim = embedding_matrix.shape[1]  # should be 384
index = faiss.IndexFlatL2(dim)
index.add(embedding_matrix)

print("FAISS index created!")
print("Index total vectors:", index.ntotal)


FAISS index created!
Index total vectors: 44


## DEFINE FAISS SEARCH FUNCTION

In [None]:
def search_faiss(query: str, top_k: int = 5):
    # 1. Embed the query
    query_embedding = embed_text_local(query)
    query_vector = np.array(query_embedding).astype("float32").reshape(1, -1)

    # 2. Search in FAISS
    distances, indices = index.search(query_vector, top_k)

    # 3. Return the matching chunks (strings)
    results = []
    for i in indices[0]:
        results.append(chunks[i])
    return results

# Quick test
test_query = "What is this guide about?"
results = search_faiss(test_query, top_k=3)

for i, res in enumerate(results):
    print(f"\n--- Result {i+1} ---\n")
    print(res[:500])



--- Result 1 ---

retailer. They do this to secure the sale from 
you and stop you from being tempted to 
shop with a competitor.
Search bar: If you’re looking for a specific 
piece of information or a product on a 
website you can use the search bar to look 
for it. It’s a text box, usually found at the 
top of the homepage. Type in what you’re 
looking for and then click the magnifying 
glass or press enter to start your search.
3. Understanding key terminology
9  |  A beginner’s guide to doing your shopping on

--- Result 2 ---

4. The benefits of 
online shopping
11  |  A beginner’s guide to doing your shopping online

--- Result 3 ---

3. 	 You’ll then be asked to enter your name, email address, create a password and 
then click ‘Verify password’. For tips on creating a secure password see Age UK’s ‘A 
beginner’s guide to staying safe online’.
4. 	 You might then be asked to solve a puzzle. This is to prove to the website that you’re 
a legitimate person and not an internet bot, 

##CONNECT TO GROQ LLM API

In [None]:
import os

# Paste your key ONLY here, and never share this cell
os.environ["GROQ_API_KEY"] = "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"


In [None]:
from groq import Groq

client = Groq(api_key=os.environ["GROQ_API_KEY"])

response = client.chat.completions.create(
    model="llama-3.1-8b-instant",
    messages=[{"role": "user", "content": "Hello from my Colab RAG project!"}]
)

print(response.choices[0].message.content)


Hello from the other side. I'd be happy to help with your RAG project in Colab. What specific questions or challenges do you have at the moment?


## DEFINE RAG ANSWERING FUNCTION

In [None]:
def answer_query(query: str, top_k: int = 5) -> str:
    # 1. Retrieve chunks from FAISS
    retrieved_chunks = search_faiss(query, top_k=top_k)
    context = "\n\n---\n\n".join(retrieved_chunks)

    # 2. Strict prompt to reduce hallucinations
    prompt = f"""
You are a STRICT retrieval-augmented assistant.

RULES:
- Use ONLY the information from the CONTEXT below.
- If the answer is not clearly present in the context, reply:
  "The document does not provide this information."
- Do NOT guess or add extra examples.
- Do NOT use outside knowledge.

CONTEXT:
{context}

QUESTION:
{query}

ANSWER (based only on the context):
"""

    # 3. Call Groq LLM
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content


##TEST RAG WITH SAMPLE QUESTIONS

In [None]:
print("Q:", "What is the purpose of this guide?")
print("A:", answer_query("What is the purpose of this guide?"), "\n")

print("Q:", "How can someone shop online safely?")
print("A:", answer_query("How can someone shop online safely?"), "\n")

print("Q:", "What are the benefits of online shopping mentioned?")
print("A:", answer_query("What are the benefits of online shopping mentioned?"), "\n")

print("Q:", "What are the next steps after reading this guide?")
print("A:", answer_query("What are the next steps after reading this guide?"), "\n")


Q: What is the purpose of this guide?
A: The document does not provide this information. 

Q: How can someone shop online safely?
A: To shop online safely, as mentioned in the guide, follow these steps:

1. Shop with retailers you trust.
2. Beware of imitation websites. Don't click on links in texts or emails from unknown retailers, and be cautious of links that don't look legitimate.
3. Avoid public WiFi and instead do your online shopping at home on a secure line.
4. Use a credit card, if you can, as it makes it easier to receive a refund if you're a victim of fraud and also protects your main bank account if your payment details are stolen.
5. Read customer reviews.

These tips can be found on pages 1 and 2 of the context. 

Q: What are the benefits of online shopping mentioned?
A: The benefits of online shopping mentioned are:

1. Convenience
2. Variety
3. Speed
4. Budgeting (because you can see exactly what you'll be spending and easily take things out of your basket if you go ove

## QUESTIONS LIST FOR AUTO-EVALUATION

In [None]:
#  question list
questions = [
    "What is this guide about?",
    "Who is this guide for?",
    "What does online shopping allow you to do?",
    "What is one benefit of online shopping?",
    "How can someone stay safe when shopping online?",
    "What should you avoid when shopping online?",
    "What payment method can help protect you from fraud?",
    "What can customer reviews help you with?",
    "Why is budgeting easier when shopping online?",
    "Can you send online purchases to someone else?"
]

# Let the model answer each question
for q in easy_questions:
    print("Q:", q)
    print("A:", answer_query(q))
    print("------------------------------------\n")


Q: What is this guide about?
A: A beginner’s guide to doing your shopping online.
------------------------------------

Q: Who is this guide for?
A: The document does not provide this information.
------------------------------------

Q: What does online shopping allow you to do?
A: According to the context, online shopping allows you to do the following:

- Have your goods delivered direct to your door.
- Save your previous orders so you can repeat them in the future.
- Send gifts directly to friends and family, often with a gift note, because on most shopping websites you can enter a separate delivery address.
- Budget from week to week because you can see exactly what you’ll be spending before you purchase anything.
- Save your favourite items in an online shopping basket, which makes it easy to place repeat orders when you need to.
- Pick up your goods in-store with a 'click and collect' option.
------------------------------------

Q: What is one benefit of online shopping?
A: Con

## BUILD SHOPPING GUIDE AGENT CLASS (OOP RAG)

In [None]:
class ShoppingGuideAgent:

    def __init__(self, client, index, chunks):
        self.client = client
        self.index = index
        self.chunks = chunks

    def search(self, query, top_k=5):
        """FAISS semantic search"""
        query_embedding = embed_text_local(query)
        query_vector = np.array(query_embedding).astype("float32").reshape(1, -1)
        distances, indices = self.index.search(query_vector, top_k)
        return [self.chunks[i] for i in indices[0]]

    def build_prompt(self, query, context):
        """Build safe RAG prompt"""
        return f"""
You are a helpful AI agent that answers questions ONLY using the provided context.
If the answer is not clearly present, reply exactly with:
"The document does not provide this information."

CONTEXT:
{context}

QUESTION:
{query}

ANSWER:
"""

    def ask(self, query, top_k=5):
        """Answer user questions using Groq + FAISS"""
        retrieved_chunks = self.search(query, top_k)
        context = "\n\n---\n\n".join(retrieved_chunks)
        prompt = self.build_prompt(query, context)

        response = self.client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[{"role": "user", "content": prompt}]
        )

        return response.choices[0].message.content


## USE THE AGENT TO ASK QUESTIONS

In [None]:
agent = ShoppingGuideAgent(client, index, chunks)


In [None]:
questions = [
    "What is this guide about?",
    "How can I shop online safely?",
    "What is click and collect?",
    "How does online shopping help with budgeting?"
]

for q in questions:
    print(f"Q: {q}")
    print(f"A: {agent.ask(q)}\n")


Q: What is this guide about?
A: A beginner’s guide to doing your shopping online.

Q: How can I shop online safely?
A: Shop with retailers you trust, and be cautious of imitation websites by not clicking on links in texts or emails from unknown retailers and ensuring the URL of the website is authentic.

Q: What is click and collect?
A: With supermarkets, you are usually given the option to 'click and collect', which means you can order online and pick up your goods in-store, or you can arrange for them to be delivered to your home.

Q: How does online shopping help with budgeting?
A: When shopping online, the value of the items in your basket or cart will be added up as you go. This means you can see exactly what you’ll be spending before you purchase anything and can easily take things out of your basket if you go over your budget.

