In [1]:
!pip install sentence-transformers faiss-cpu



In [2]:
import faiss
import numpy as np
import re
from sentence_transformers import SentenceTransformer

In [3]:
document_text = """
Orange is a bright color between red and yellow on the visible spectrum.
It is often associated with warmth, energy, and autumn.

Orange is also a citrus fruit rich in vitamin C.
It grows on orange trees and is widely consumed as juice.

The color orange is used in traffic signs because it is highly visible.
Many sports teams use orange in their logos and uniforms.

An orange fruit has a thick peel and juicy pulp inside.
It is commonly eaten fresh or used in desserts and salads.
"""

In [4]:
model = SentenceTransformer("all-MiniLM-L6-v2")

dimension = 384
index = faiss.IndexFlatL2(dimension)

documents = []

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [5]:
def paragraph_chunk(text):
    chunks = text.split("\n\n")

    clean_chunks = []
    for chunk in chunks:
        chunk = chunk.strip()
        if len(chunk) > 0:
            clean_chunks.append(chunk)

    return clean_chunks

In [6]:
def keyword_chunk(text):
    keywords = ["color", "fruit", "trees", "spectrum", "peel"]

    lines = text.split("\n")
    chunks = []
    current_chunk = ""

    for line in lines:
        if any(keyword in line.lower() for keyword in keywords):
            if current_chunk.strip():
                chunks.append(current_chunk.strip())
            current_chunk = line
        else:
            current_chunk += " " + line

    if current_chunk.strip():
        chunks.append(current_chunk.strip())

    return chunks

In [7]:
para_index = faiss.IndexFlatL2(dimension)
para_documents = []

para_chunks = paragraph_chunk(document_text)
para_embeddings = model.encode(para_chunks)

para_index.add(np.array(para_embeddings))
para_documents.extend(para_chunks)

print("Paragraph Chunks:",len(para_chunks),'\n')
for i, c in enumerate(para_chunks, start=1):
    print(i, c,"\n")

Paragraph Chunks: 4 

1 Orange is a bright color between red and yellow on the visible spectrum.
It is often associated with warmth, energy, and autumn. 

2 Orange is also a citrus fruit rich in vitamin C.
It grows on orange trees and is widely consumed as juice. 

3 The color orange is used in traffic signs because it is highly visible.
Many sports teams use orange in their logos and uniforms. 

4 An orange fruit has a thick peel and juicy pulp inside.
It is commonly eaten fresh or used in desserts and salads. 



In [8]:
keyword_index = faiss.IndexFlatL2(dimension)
keyword_documents = []

keyword_chunks = keyword_chunk(document_text)
keyword_embeddings = model.encode(keyword_chunks)

keyword_index.add(np.array(keyword_embeddings))
keyword_documents.extend(keyword_chunks)

print("Keyword Chunks:", len(keyword_chunks),'\n')
for i, c in enumerate(keyword_chunks, start=1):
    print(i, c,"\n")

Keyword Chunks: 5 

1 Orange is a bright color between red and yellow on the visible spectrum. It is often associated with warmth, energy, and autumn. 

2 Orange is also a citrus fruit rich in vitamin C. 

3 It grows on orange trees and is widely consumed as juice. 

4 The color orange is used in traffic signs because it is highly visible. Many sports teams use orange in their logos and uniforms. 

5 An orange fruit has a thick peel and juicy pulp inside. It is commonly eaten fresh or used in desserts and salads. 



In [9]:
def search_paragraph(query, k=2):
    query_embedding = model.encode([query])
    D, I = para_index.search(np.array(query_embedding), k)

    results = []
    for idx in I[0]:
        results.append(para_documents[idx])

    return results

In [10]:
def search_keyword(query, k=2):
    query_embedding = model.encode([query])
    D, I = keyword_index.search(np.array(query_embedding), k)

    results = []
    for idx in I[0]:
        results.append(keyword_documents[idx])

    return results

In [11]:
query = "Which planet has the Great Red Spot?"

print("Paragraph Chunking Result:\n")
para_results = search_paragraph(query)
for i,r in enumerate(para_results,start=1):
    print(i,r,"\n")

print("Keyword Chunking Result:\n")
keyword_results = search_keyword(query)
for i,r in enumerate(keyword_results,start=1):
    print(i,r,"\n")

Paragraph Chunking Result:

1 Orange is a bright color between red and yellow on the visible spectrum.
It is often associated with warmth, energy, and autumn. 

2 The color orange is used in traffic signs because it is highly visible.
Many sports teams use orange in their logos and uniforms. 

Keyword Chunking Result:

1 Orange is a bright color between red and yellow on the visible spectrum. It is often associated with warmth, energy, and autumn. 

2 It grows on orange trees and is widely consumed as juice. 



In [12]:
from groq import Groq

client = Groq(api_key="")

In [13]:
def rag_with_groq_paragraph(query, k=2):

    query_embedding = model.encode([query])
    D, I = para_index.search(np.array(query_embedding), k)

    retrieved_chunks = []
    for i in I[0]:
        retrieved_chunks.append(para_documents[i])

    context = "\n\n".join(retrieved_chunks)

    prompt = f"Context: {context}\nQuestion:{query}"

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    answer = response.choices[0].message.content

    print("Query:", query)
    print("Retrieved Context:",context)
    print("Groq Answer:")
    print(answer)

In [14]:
def rag_with_groq_keyword(query, k=2):
    query_embedding = model.encode([query])
    D, I = keyword_index.search(np.array(query_embedding), k)

    retrieved_chunks = []
    for idx in I[0]:
        retrieved_chunks.append(keyword_documents[idx])

    context = "\n\n".join(retrieved_chunks)

    prompt = f"Context: {context}\nQuestion:{query}"

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    answer = response.choices[0].message.content

    print("Query:", query)
    print("Retrieved Context:")
    print(context)
    print("Groq Answer:",answer)

In [15]:
rag_with_groq_paragraph("What does the color orange represent?")

Query: What does the color orange represent?
Retrieved Context: Orange is a bright color between red and yellow on the visible spectrum.
It is often associated with warmth, energy, and autumn.

The color orange is used in traffic signs because it is highly visible.
Many sports teams use orange in their logos and uniforms.
Groq Answer:
The color orange is often associated with several meanings, including:

1. Warmth: Orange is often linked to feelings of warmth, coziness, and comfort.
2. Energy: Orange is a vibrant and energetic color that can evoke feelings of excitement and enthusiasm.
3. Autumn: Orange is a color commonly associated with the autumn season, symbolizing the changing leaves and harvest time.
4. Caution: In a more practical sense, orange is used in traffic signs to grab attention and signal caution.

Overall, the color orange represents a dynamic and attention-grabbing quality that can evoke a range of emotions and associations.


In [16]:
rag_with_groq_keyword("What does the color orange represent?")

Query: What does the color orange represent?
Retrieved Context:
Orange is a bright color between red and yellow on the visible spectrum. It is often associated with warmth, energy, and autumn.

The color orange is used in traffic signs because it is highly visible. Many sports teams use orange in their logos and uniforms.
Groq Answer: The color orange is often associated with warmth, energy, and autumn.
