In [None]:
!pip install -q sentence-transformers faiss-cpu groq


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.3/138.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from groq import Groq
from getpass import getpass


In [None]:
# Initialize client
client = Groq(api_key="")

In [None]:
big_document = """
Apple is a sweet, edible fruit produced by an apple tree.
Apples are rich in fiber, vitamin C, and antioxidants.
There are many varieties such as Fuji, Gala, and Granny Smith.
Apples are used in desserts like apple pie and apple juice.
Eating apples daily supports heart health.

Apple Inc. is an American technology company headquartered in Cupertino, California.
It was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in 1976.
Apple designs products like iPhone, iPad, Mac, and Apple Watch.
The company earns revenue from hardware, software, and services.
Apple focuses heavily on privacy and ecosystem integration.

The apple fruit grows in temperate climates.
Farmers harvest apples in autumn.
Apple Inc. also provides services like iCloud and Apple Music.
"""


In [None]:
model = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
def fixed_chunking(text, chunk_size=200):
    chunks = []
    for i in range(0, len(text), chunk_size):
        chunks.append(text[i:i+chunk_size])
    return chunks

fixed_chunks = fixed_chunking(big_document)

print("===== FIXED CHUNKS =====\n")
for i, chunk in enumerate(fixed_chunks):
    print(f"Chunk {i}:\n{chunk}\n")


===== FIXED CHUNKS =====

Chunk 0:

Apple is a sweet, edible fruit produced by an apple tree.
Apples are rich in fiber, vitamin C, and antioxidants.
There are many varieties such as Fuji, Gala, and Granny Smith.
Apples are used in dess

Chunk 1:
erts like apple pie and apple juice.
Eating apples daily supports heart health.

Apple Inc. is an American technology company headquartered in Cupertino, California.
It was founded by Steve Jobs, Stev

Chunk 2:
e Wozniak, and Ronald Wayne in 1976.
Apple designs products like iPhone, iPad, Mac, and Apple Watch.
The company earns revenue from hardware, software, and services.
Apple focuses heavily on privacy a

Chunk 3:
nd ecosystem integration.

The apple fruit grows in temperate climates.
Farmers harvest apples in autumn.
Apple Inc. also provides services like iCloud and Apple Music.




In [None]:
def semantic_chunking(text, model, threshold=0.75):
    sentences = text.split(". ")
    chunks = []
    current_chunk = sentences[0]

    for i in range(1, len(sentences)):
        emb1 = model.encode(current_chunk)
        emb2 = model.encode(sentences[i])

        similarity = np.dot(emb1, emb2) / (
            np.linalg.norm(emb1) * np.linalg.norm(emb2)
        )

        if similarity > threshold:
            current_chunk += ". " + sentences[i]
        else:
            chunks.append(current_chunk)
            current_chunk = sentences[i]

    chunks.append(current_chunk)
    return chunks

semantic_chunks = semantic_chunking(big_document, model)

print("===== SEMANTIC CHUNKS =====\n")
for i, chunk in enumerate(semantic_chunks):
    print(f"Chunk {i}:\n{chunk}\n")


===== SEMANTIC CHUNKS =====

Chunk 0:

Apple is a sweet, edible fruit produced by an apple tree.
Apples are rich in fiber, vitamin C, and antioxidants.
There are many varieties such as Fuji, Gala, and Granny Smith.
Apples are used in desserts like apple pie and apple juice.
Eating apples daily supports heart health.

Apple Inc

Chunk 1:
is an American technology company headquartered in Cupertino, California.
It was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in 1976.
Apple designs products like iPhone, iPad, Mac, and Apple Watch.
The company earns revenue from hardware, software, and services.
Apple focuses heavily on privacy and ecosystem integration.

The apple fruit grows in temperate climates.
Farmers harvest apples in autumn.
Apple Inc

Chunk 2:
also provides services like iCloud and Apple Music.




In [None]:
def build_faiss_index(chunks):
    embeddings = model.encode(chunks)
    dimension = embeddings.shape[1]

    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings))

    return index, embeddings

fixed_index, _ = build_faiss_index(fixed_chunks)
semantic_index, _ = build_faiss_index(semantic_chunks)


In [None]:
def retrieve(query, index, chunks, top_k=2):
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), top_k)

    results = []
    for idx in indices[0]:
        results.append(chunks[idx])

    return results


In [None]:
def generate_answer(query, context):

    prompt = f"""
You are a helpful assistant.

Use ONLY the context below to answer the question.

Context:
{context}

Question:
{query}

Answer clearly:
"""

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )

    return response.choices[0].message.content


In [None]:
# query = "Who founded Apple?"

query = "What vitamins are in apples?"
# query = "What products does Apple sell?"


In [None]:
print("===== FIXED RETRIEVAL =====\n")
fixed_results = retrieve(query, fixed_index, fixed_chunks)
for res in fixed_results:
    print(res)
    print("-----")

print("\n===== SEMANTIC RETRIEVAL =====\n")
semantic_results = retrieve(query, semantic_index, semantic_chunks)
for res in semantic_results:
    print(res)
    print("-----")


===== FIXED RETRIEVAL =====


Apple is a sweet, edible fruit produced by an apple tree.
Apples are rich in fiber, vitamin C, and antioxidants.
There are many varieties such as Fuji, Gala, and Granny Smith.
Apples are used in dess
-----
erts like apple pie and apple juice.
Eating apples daily supports heart health.

Apple Inc. is an American technology company headquartered in Cupertino, California.
It was founded by Steve Jobs, Stev
-----

===== SEMANTIC RETRIEVAL =====


Apple is a sweet, edible fruit produced by an apple tree.
Apples are rich in fiber, vitamin C, and antioxidants.
There are many varieties such as Fuji, Gala, and Granny Smith.
Apples are used in desserts like apple pie and apple juice.
Eating apples daily supports heart health.

Apple Inc
-----
is an American technology company headquartered in Cupertino, California.
It was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in 1976.
Apple designs products like iPhone, iPad, Mac, and Apple Watch.
The company earns 

In [None]:
context_fixed = " ".join(fixed_results)
context_semantic = " ".join(semantic_results)

print("===== GROQ ANSWER (FIXED) =====\n")
ans_fixed = generate_answer(query, context_fixed)
print(ans_fixed)

print("\n===== GROQ ANSWER (SEMANTIC) =====\n")
ans_semantic = generate_answer(query, context_semantic)
print(ans_semantic)


===== GROQ ANSWER (FIXED) =====

Apples are rich in vitamin C.

===== GROQ ANSWER (SEMANTIC) =====

Apples are rich in vitamin C.
