In [3]:
import os
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import subprocess


Load Text Documents

In [4]:
DOCS_FOLDER = "docs"

documents = []

for file in os.listdir(DOCS_FOLDER):
    if file.endswith(".txt"):
        with open(os.path.join(DOCS_FOLDER, file), "r", encoding="utf-8") as f:
            documents.append(f.read())

print(f"Loaded {len(documents)} documents")


Loaded 1 documents


Chunk Documents (200â€“300 Words)

In [6]:
def chunk_text(text, chunk_size=200):
    words = text.split()
    chunks = []
    
    for i in range(0, len(words), chunk_size):
        chunk = " ".join(words[i:i+chunk_size])
        chunks.append(chunk)
        
    return chunks

chunks = []
for doc in documents:
    chunks.extend(chunk_text(doc))

print(f"Total Chunks Created: {len(chunks)}")


Total Chunks Created: 2


In [7]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")

chunk_embeddings = embedder.encode(
    chunks,
    convert_to_numpy=True,
    show_progress_bar=True
)

print("Embeddings shape:", chunk_embeddings.shape)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Embeddings shape: (2, 384)


In [17]:
def retrieve_top_chunks(question, top_k=3):
    question_embedding = embedder.encode([question], convert_to_numpy=True)
    
    similarities = cosine_similarity(question_embedding, chunk_embeddings)[0]
    
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    
    return [chunks[i] for i in top_indices]


In [23]:
def ask_llama(prompt):
    result = subprocess.run(
        ["ollama", "run", "llama3.2"],
        input=prompt.encode("utf-8"),
        stdout=subprocess.PIPE
    )
    return result.stdout.decode("utf-8").strip()


In [24]:
def llama_no_rag(question):
    prompt = f"Answer the question clearly and short:\n{question}"
    return ask_llama(prompt)


In [25]:
def llama_with_rag(question):
    top_chunks = retrieve_top_chunks(question)
    
    context = "\n\n".join(top_chunks)
    
    prompt = f"""
Use ONLY the following context to answer the question.

Context:
{context}

Question: {question}
Answer:
"""
    return ask_llama(prompt)


In [26]:
questions = [
    "What is Artificial Intelligence?",
    "What is Natural Language Processing?",
    "What are Large Language Models?",
    "What is hallucination in LLMs?",
    "What is Retrieval-Augmented Generation?",
    "Why is chunking used in RAG systems?",
    "What are embeddings?",
    "How does similarity search work?",
    "Why does RAG reduce hallucinations?",
    "What are applications of RAG?"
]


In [27]:
print(ask_llama("What is Artificial Intelligence?"))


Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that would typically require human intelligence, such as:

1. Learning: AI systems can learn from data and improve their performance over time.
2. Problem-solving: AI systems can solve complex problems by analyzing data, identifying patterns, and making decisions.
3. Reasoning: AI systems can draw conclusions based on the information they have been trained on.
4. Perception: AI systems can interpret and understand data from sensors, such as images, speech, and text.

AI has many applications in various fields, including:

1. Virtual assistants (e.g., Siri, Alexa)
2. Image recognition and object detection
3. Natural Language Processing (NLP) for language translation and sentiment analysis
4. Predictive analytics and decision-making
5. Robotics and autonomous vehicles

There are several types of AI, including:

1. Narrow or Weak AI: Designed to perform a specific task, such as facial recogni

In [28]:
results = []

for q in questions:
    print("\n" + "="*80)
    print("QUESTION:", q)
    
    no_rag_answer = llama_no_rag(q)
    rag_answer = llama_with_rag(q)
    
    print("\n--- LLaMA Alone ---")
    print(no_rag_answer)
    
    print("\n--- LLaMA + RAG ---")
    print(rag_answer)
    
    results.append({
        "Question": q,
        "LLaMA Alone": no_rag_answer,
        "LLaMA + RAG": rag_answer
    })



QUESTION: What is Artificial Intelligence?

--- LLaMA Alone ---
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that typically require human intelligence, such as:

1. Learning: The ability to learn from data and improve performance over time.
2. Problem-solving: The ability to analyze problems and find solutions.
3. Reasoning: The ability to draw conclusions based on available information.
4. Perception: The ability to interpret and understand data from sensors.

AI systems use algorithms, machine learning models, and other techniques to process and analyze data, making decisions, and taking actions autonomously or with human oversight.

There are several types of AI, including:

1. Narrow or Weak AI: Designed to perform a specific task, such as facial recognition or language translation.
2. General or Strong AI: A hypothetical AI system that possesses the ability to understand, learn, and apply knowledge across a wide range of tasks,

In [29]:
df_results = pd.DataFrame(results)
df_results.to_csv("rag_comparison_results.csv", index=False)

df_results


Unnamed: 0,Question,LLaMA Alone,LLaMA + RAG
0,What is Artificial Intelligence?,Artificial Intelligence (AI) refers to the dev...,Artificial Intelligence (AI) refers to the sim...
1,What is Natural Language Processing?,Natural Language Processing (NLP) is a subfiel...,Natural Language Processing (NLP) refers to en...
2,What are Large Language Models?,A Large Language Model (LLM) is a type of arti...,"According to the provided context, Large Langu..."
3,What is hallucination in LLMs?,"In Large Language Models (LLMs), a hallucinati...","In Large Language Models (LLMs), hallucination..."
4,What is Retrieval-Augmented Generation?,Retrieval-Augmented Generation (RAG) is a tech...,Retrieval-Augmented Generation (RAG) is a meth...
5,Why is chunking used in RAG systems?,Chunking is used in RAG (Resource Allocation G...,Chunking is used in RAG systems to divide docu...
6,What are embeddings?,Embeddings are a fundamental concept in natura...,In a Retrieval-Augmented Generation (RAG) syst...
7,How does similarity search work?,Similarity search is a type of search algorith...,Similarity search in a Retrieval-Augmented Gen...
8,Why does RAG reduce hallucinations?,"I couldn't find any information on ""RAG"" reduc...","According to the context, RAG reduces hallucin..."
9,What are applications of RAG?,"RAG stands for Radiation Attenuation Glass, al...",RAG systems are used in:\n\n1. Chatbots\n2. Kn...
