In [1]:
# LlamaIndex RAG Pipeline Exploration

# This notebook explores the LlamaIndex framework for building a Retrieval-Augmented Generation (RAG) pipeline.

## Table of Contents
# 1. Setup and Imports
# 2. Document Loading
# 3. Document Metadata Configuration
# 4. Document Transformation & Extraction
# 5. Embedding Generation
# 6. Vector Store and Indexing
# 7. Query Engine
# 8. Persistent Storage with ChromaDB

## 1. Setup and Imports

In [23]:
# Core imports for document processing and LLM integration
from llama_index.core import SimpleDirectoryReader, Document, VectorStoreIndex, StorageContext
from llama_index.core.schema import MetadataMode
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor, QuestionsAnsweredExtractor
from llama_index.core.ingestion import IngestionPipeline

# LLM and embedding imports
from llama_index.llms.groq import Groq
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Vector store imports
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

# Utility imports
import os
import getpass
import pprint

# Fix for nested asyncio in Jupyter notebooks - MUST be applied before any LlamaIndex operations
import nest_asyncio
nest_asyncio.apply()

## 2. Document Loading

Load documents from the data directory.

In [3]:
# Load all documents from the data directory
# filename_as_id=False means auto-generate document IDs
docs = SimpleDirectoryReader(input_dir='../data', filename_as_id=False).load_data()

In [4]:
# Check the number of documents loaded
print(f"Number of documents loaded: {len(docs)}")

Number of documents loaded: 7


In [5]:
# Inspect the loaded documents structure
pprint.pprint(docs)

[Document(id_='911b995b-07a3-4c54-8cbc-7ea1cebe5e03', embedding=None, metadata={'page_label': '1', 'file_name': 'Transformations in pyspark .pdf', 'file_path': 'c:\\Users\\Ibrahim\\Documents\\WORK\\Faculty-Projects\\nlp\\notebook\\..\\data\\Transformations in pyspark .pdf', 'file_type': 'application/pdf', 'file_size': 101697, 'creation_date': '2025-12-27', 'last_modified_date': '2025-12-13'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='[ \nE T L \np r o c e s s e s \nu s i n g \nP y S p a r k \n] \n# \nQ u i c k \nS u m m a r y \n 1.EnvironmentSetupandSparkSessionCreation\n●\nInstall\nPySpark\n: pipinstallpyspark●\nStart\na\nSparkSessi

## 3. Document Metadata Configuration

Configure which metadata should be included when sending documents to LLMs vs embeddings.

In [6]:
# Configure document text templates and metadata exclusions
# page_label is not useful for embeddings, so we exclude it
for doc in docs:
    doc.text_template = "Metadata:\n{metadata_str}\n---\nContent:\n{content}"
    if "page_label" not in doc.excluded_embed_metadata_keys:
        doc.excluded_embed_metadata_keys.append("page_label")

# file_path is also not crucial for embeddings
for doc in docs:
    doc.text_template = "Metadata:\n{metadata_str}\n---\nContent:\n{content}"
    if "file_path" not in doc.excluded_embed_metadata_keys:
        doc.excluded_embed_metadata_keys.append("file_path")

In [7]:
# Verify the documents after metadata configuration
pprint.pprint(docs)

[Document(id_='911b995b-07a3-4c54-8cbc-7ea1cebe5e03', embedding=None, metadata={'page_label': '1', 'file_name': 'Transformations in pyspark .pdf', 'file_path': 'c:\\Users\\Ibrahim\\Documents\\WORK\\Faculty-Projects\\nlp\\notebook\\..\\data\\Transformations in pyspark .pdf', 'file_type': 'application/pdf', 'file_size': 101697, 'creation_date': '2025-12-27', 'last_modified_date': '2025-12-13'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date', 'page_label', 'file_path'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='[ \nE T L \np r o c e s s e s \nu s i n g \nP y S p a r k \n] \n# \nQ u i c k \nS u m m a r y \n 1.EnvironmentSetupandSparkSessionCreation\n●\nInstall\nPySpark\n: pipinstallpys

In [8]:
# View what the embedding model will see (without page_label and file_path)
print("=== Content for Embedding ===")
print(docs[0].get_content(metadata_mode=MetadataMode.EMBED))

=== Content for Embedding ===
[ 
E T L 
p r o c e s s e s 
u s i n g 
P y S p a r k 
] 
# 
Q u i c k 
S u m m a r y 
 1.EnvironmentSetupandSparkSessionCreation
●
Install
PySpark
: pipinstallpyspark●
Start
a
SparkSession
: frompyspark.sqlimportSparkSession;spark=SparkSession.builder.appName('ETLProcess').getOrCreate()
2.DataExtraction
●
Read
Data
from
CSV
: df= spark.read.csv('path/to/csv',inferSchema=True,header=True)●
Read
Data
from
JSON
: df= spark.read.json('path/to/json')●
Read
Data
from
Parquet
: df= spark.read.parquet('path/to/parquet')●
Read
Data
from
a
Database
: df=spark.read.format("jdbc").option("url",jdbc_url).option("dbtable","table_name").option("user","username").option("password","password").load()
3.DataTransformation
●
Selecting
Columns
: df.select('column1','column2')●
Filtering
Data
: df.filter(df['column']> value)●
Adding
New
Columns
: df.withColumn('new_column',df['column']+10)●
Renaming
Columns
: df.withColumnRenamed('old_name','new_name')●
Grouping
and
Aggregati

In [9]:
# View what the LLM will see (includes all metadata)
print("=== Content for LLM ===")
print(docs[0].get_content(metadata_mode=MetadataMode.LLM))

=== Content for LLM ===
Metadata:
page_label: 1
file_path: c:\Users\Ibrahim\Documents\WORK\Faculty-Projects\nlp\notebook\..\data\Transformations in pyspark .pdf
---
Content:
[ 
E T L 
p r o c e s s e s 
u s i n g 
P y S p a r k 
] 
# 
Q u i c k 
S u m m a r y 
 1.EnvironmentSetupandSparkSessionCreation
●
Install
PySpark
: pipinstallpyspark●
Start
a
SparkSession
: frompyspark.sqlimportSparkSession;spark=SparkSession.builder.appName('ETLProcess').getOrCreate()
2.DataExtraction
●
Read
Data
from
CSV
: df= spark.read.csv('path/to/csv',inferSchema=True,header=True)●
Read
Data
from
JSON
: df= spark.read.json('path/to/json')●
Read
Data
from
Parquet
: df= spark.read.parquet('path/to/parquet')●
Read
Data
from
a
Database
: df=spark.read.format("jdbc").option("url",jdbc_url).option("dbtable","table_name").option("user","username").option("password","password").load()
3.DataTransformation
●
Selecting
Columns
: df.select('column1','column2')●
Filtering
Data
: df.filter(df['column']> value)●
Adding
N

### Example: Creating a Custom Document with Metadata

Demonstration of how to create a document from scratch with custom metadata configuration.

In [10]:
# Create a custom document with specific metadata handling
document = Document(
    text="This framework is amazing!",
    metadata={
        "filename": "spark the definitive guide",
        "category": "technology",
        "author": "ibrahim"
    },
    # Exclude 'filename' from both LLM and embedding views
    excluded_llm_metadata_keys=["filename"],
    excluded_embed_metadata_keys=["filename"],
    # Customize how metadata is formatted
    metadata_separator="\n",
    metadata_template="{key}=>{value}",
    text_template="Metadata: \n{metadata_str}\n-------\n Content=>{content}"
)

In [11]:
# View what the LLM sees for the custom document
print("=== The LLM sees this ===")
print(document.get_content(metadata_mode=MetadataMode.LLM))

=== The LLM sees this ===
Metadata: 
category=>technology
author=>ibrahim
-------
 Content=>This framework is amazing!


In [12]:
# View what the embedding model sees for the custom document
print("=== The embedding model sees this ===")
print(document.get_content(metadata_mode=MetadataMode.EMBED))

=== The embedding model sees this ===
Metadata: 
category=>technology
author=>ibrahim
-------
 Content=>This framework is amazing!


## 4. LLM Setup and Document Transformation

Set up the Groq LLM and create an ingestion pipeline that:
- Splits documents into chunks
- Extracts titles using LLM
- Generates questions that each chunk can answer

In [13]:
# # Set up Groq API key securely
# os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your Groq API key: ")

In [14]:
# # Initialize Groq LLM for document transformations
# # Using groq/compound-mini model for fast inference
# llm_transformations = Groq(
#     model="groq/compound-mini", 
#     api_key=os.environ["GROQ_API_KEY"]
# )

In [15]:
# # Create an ingestion pipeline with multiple transformations
# # 1. Text Splitter: Chunks documents into manageable pieces
# text_splitter = SentenceSplitter(
#     separator=" ",           # Split on spaces
#     chunk_size=20248,        # Max tokens per chunk
#     chunk_overlap=128        # Overlap between chunks for context continuity
# )

# # 2. Title Extractor: Generates descriptive titles for chunks using LLM
# title_extractor = TitleExtractor(
#     llm=llm_transformations,
#     nodes=3                  # Use 3 nodes for context when generating titles
# )

# # 3. Q&A Extractor: Generates questions that each chunk can answer
# qa_extractor = QuestionsAnsweredExtractor(
#     llm=llm_transformations,
#     questions=3              # Generate 3 questions per chunk
# )

# # Combine all transformations into a pipeline
# pipeline = IngestionPipeline(
#     transformations=[
#         text_splitter,
#         title_extractor,
#         qa_extractor
#     ]
# )

# # Run the pipeline on the documents
# # in_place=True modifies the original documents
# nodes = pipeline.run(
#     documents=docs,
#     in_place=True,
#     show_progress=True
# )

## 4.1. Alternative: Using Ollama (Deployed LLM Service)

Instead of using Groq, you can use your deployed Ollama service for transformations and querying.
Ollama is running at http://localhost:11434 in your Docker container.

In [16]:
# Import Ollama integration
from llama_index.llms.ollama import Ollama

# Initialize Ollama LLM pointing to your Docker service
# Make sure your Ollama container is running and has a model pulled
# Use the FULL model name with tag as shown in 'ollama list'
# Note: Use smaller models if you have limited memory (see configuration tips below)
ollama_llm = Ollama(
    model="llama3.2:latest",
    base_url="http://localhost:11434",
    keep_alive=0,          # unload immediately after request
    context_window=1024,   # smaller context
    request_timeout=300.0,     # 5 minute timeout for slow responses
    temperature=0.1 
    )


# Test the Ollama connection
test_response = ollama_llm.complete("Hello! Can you hear me?")

print("=== Ollama Test Response ===")
print(test_response)

2025-12-28 01:09:35,775 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


=== Ollama Test Response ===
I'm glad you're here. Unfortunately, I'm a large language model, I don't have the ability to hear or see you in the classical sense. I communicate with you through text-based interactions only. However, I can still chat with you and respond to your questions and statements! How's your day going so far?


### Check System Memory & Choose Model

Before connecting to Ollama, check your available memory and choose an appropriate model.

### Using Ollama for Document Transformations

Create an ingestion pipeline using Ollama instead of Groq.

In [25]:
# Create transformations using Ollama LLM
import nest_asyncio
import asyncio

text_splitter_ollama = SentenceSplitter(
    separator=" ",
    chunk_size=1024,         # Smaller chunks for faster processing
    chunk_overlap=128
)

# Title extractor using Ollama
title_extractor_ollama = TitleExtractor(
    llm=ollama_llm,
    nodes=2
)

# Q&A extractor using Ollama
qa_extractor_ollama = QuestionsAnsweredExtractor(
    llm=ollama_llm,
    questions=3
)

# Create pipeline with Ollama-based extractors
pipeline_ollama = IngestionPipeline(
    transformations=[
        text_splitter_ollama,
        title_extractor_ollama,
        qa_extractor_ollama
    ]
)

# Run the pipeline on documents
# Note: This might take longer than Groq depending on your GPU
async def run_pipeline():
    return await pipeline_ollama.arun(documents=docs, in_place=False, show_progress=True)

nodes_ollama = asyncio.get_event_loop().run_until_complete(run_pipeline())

print(f"Created {len(nodes_ollama)} nodes with Ollama transformations")

Parsing nodes: 100%|██████████| 7/7 [00:00<00:00, 1416.72it/s]
  0%|          | 0/7 [00:00<?, ?it/s]2025-12-28 01:24:58,834 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-12-28 01:25:02,194 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-12-28 01:25:04,753 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-12-28 01:25:06,106 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-12-28 01:25:09,000 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
 14%|█▍        | 1/7 [00:16<01:38, 16.35s/it]2025-12-28 01:25:13,146 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
 29%|██▊       | 2/7 [00:20<00:45,  9.17s/it]2025-12-28 01:25:17,151 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
 43%|████▎     | 3/7 [00:24<00:27,  6.81s/it]2025-12-28 01:25:19,855 - INFO - HTTP Request: POST

Created 7 nodes with Ollama transformations





### Interactive Chat with Ollama

Create a simple chat interface to interact with your deployed Ollama service.

In [None]:
# Simple chat function with Ollama
def chat_with_ollama(message, llm=ollama_llm):
    """
    Send a message to Ollama and get a response.
    
    Args:
        message: User's question or prompt
        llm: Ollama LLM instance
    
    Returns:
        Response from the LLM
    """
    response = llm.complete(message)
    return response.text

# Example interactions
print("=== Chat Example 1 ===")
response1 = chat_with_ollama("What is machine learning in simple terms?")
print(response1)

print("\n=== Chat Example 2 ===")
response2 = chat_with_ollama("Explain the difference between supervised and unsupervised learning.")
print(response2)

: 

: 

: 

: 

: 

: 

### RAG Query Engine with Ollama

Use Ollama with your vector index for RAG-based question answering.

In [None]:
# Create a vector index with Ollama-transformed nodes
# Use the nodes created with Ollama or the original nodes
index_ollama = VectorStoreIndex(nodes_ollama, embed_model=hf_embeddings)

# Create query engine with Ollama LLM
query_engine_ollama = index_ollama.as_query_engine(
    llm=ollama_llm,
    similarity_top_k=3  # Retrieve top 3 most relevant chunks
)

# Query the documents using Ollama
user_question = "What is this document about?"
print(f"=== Question: {user_question} ===\n")

response_ollama = query_engine_ollama.query(user_question)
print("=== Ollama RAG Response ===")
print(response_ollama)

# Show the source nodes used
print("\n=== Retrieved Source Chunks ===")
for i, node in enumerate(response_ollama.source_nodes, 1):
    print(f"\n--- Source {i} (Score: {node.score:.4f}) ---")
    print(node.text[:300] + "...")  # Show first 300 characters

: 

: 

: 

: 

: 

: 

### Interactive Q&A Loop with Ollama

Create an interactive loop for continuous questions and answers.

In [None]:
# Interactive Q&A function
def interactive_qa_with_ollama(query_engine, max_questions=5):
    """
    Interactive Q&A session with Ollama RAG system.
    
    Args:
        query_engine: The query engine to use
        max_questions: Maximum number of questions (default: 5)
    """
    print("=" * 60)
    print("Interactive Q&A with Ollama (type 'exit' or 'quit' to stop)")
    print("=" * 60)
    
    question_count = 0
    
    while question_count < max_questions:
        # Get user input
        user_query = input(f"\n[Question {question_count + 1}] You: ").strip()
        
        # Check for exit
        if user_query.lower() in ['exit', 'quit', 'q']:
            print("Exiting interactive session. Goodbye!")
            break
        
        if not user_query:
            print("Please enter a valid question.")
            continue
        
        # Query the engine
        print(f"\n[Ollama]: Thinking...", end="")
        try:
            response = query_engine.query(user_query)
            print("\r" + " " * 30 + "\r", end="")  # Clear "Thinking..."
            print(f"[Ollama]: {response}\n")
            
            # Optionally show sources
            show_sources = input("Show source chunks? (y/n): ").strip().lower()
            if show_sources == 'y':
                print("\n--- Source Chunks ---")
                for i, node in enumerate(response.source_nodes, 1):
                    print(f"\nSource {i} (Relevance: {node.score:.4f}):")
                    print(node.text[:200] + "...\n")
        
        except Exception as e:
            print(f"\nError: {e}")
        
        question_count += 1
    
    print(f"\nSession ended. Answered {question_count} questions.")

# Run interactive session
# Uncomment the line below to start the interactive session
# interactive_qa_with_ollama(query_engine_ollama, max_questions=10)

: 

: 

: 

: 

: 

: 

### Inspect Transformed Nodes

View the nodes after transformation to see the extracted metadata.

In [None]:
# View what the embedding model sees for the first node
print("=== Content for Embedding (First Node) ===")
print(nodes[0].get_content(metadata_mode=MetadataMode.EMBED))

[Excerpt from document]
document_title: **End‑to‑End PySpark ETL Workflow: Environment Setup, Data Extraction, Transformation, and Missing‑Value Handling**
questions_this_excerpt_can_answer: 1. What exact code snippet does the document give for creating a SparkSession named **'ETLProcess'**?

2. Which three `DataFrame.na` methods are listed for handling missing values, and what are the precise method calls shown for each (dropping rows, filling values, and replacing values)?

3. According to the document’s metadata, what is the file size (in bytes) of **“Transformations in pyspark .pdf”**?
Excerpt:
-----
[ 
E T L 
p r o c e s s e s 
u s i n g 
P y S p a r k 
] 
# 
Q u i c k 
S u m m a r y 
 1.EnvironmentSetupandSparkSessionCreation
●
Install
PySpark
: pipinstallpyspark●
Start
a
SparkSession
: frompyspark.sqlimportSparkSession;spark=SparkSession.builder.appName('ETLProcess').getOrCreate()
2.DataExtraction
●
Read
Data
from
CSV
: df= spark.read.csv('path/to/csv',inferSchema=True,header=Tr

: 

: 

: 

: 

: 

: 

In [None]:
# Inspect the full structure of the first node
print("=== First Node Structure ===")
pprint.pprint(nodes[0])

TextNode(id_='d853ddbd-4435-4b3d-8904-b264063852b0', embedding=None, metadata={'page_label': '1', 'file_name': 'Transformations in pyspark .pdf', 'file_path': 'c:\\Users\\Ibrahim\\Documents\\WORK\\Faculty-Projects\\nlp\\notebook\\..\\data\\Transformations in pyspark .pdf', 'file_type': 'application/pdf', 'file_size': 101697, 'creation_date': '2025-12-27', 'last_modified_date': '2025-12-13', 'document_title': '**End‑to‑End PySpark ETL Workflow: Environment Setup, Data Extraction, Transformation, and Missing‑Value Handling**', 'questions_this_excerpt_can_answer': "1. What exact code snippet does the document give for creating a SparkSession named **'ETLProcess'**?\n\n2. Which three\u202f`DataFrame.na`\u202fmethods are listed for handling missing values, and what are the precise method calls shown for each (dropping rows, filling values, and replacing values)?\n\n3. According to the document’s metadata, what is the file size (in bytes) of **“Transformations in pyspark .pdf”**?"}, excluded

: 

: 

: 

: 

: 

: 

In [None]:
# View what the LLM sees for the first node (includes extracted title and Q&A)
print("=== Content for LLM (First Node) ===")
print(nodes[0].get_content(metadata_mode=MetadataMode.LLM))

[Excerpt from document]
page_label: 1
file_path: c:\Users\Ibrahim\Documents\WORK\Faculty-Projects\nlp\notebook\..\data\Transformations in pyspark .pdf
document_title: **End‑to‑End PySpark ETL Workflow: Environment Setup, Data Extraction, Transformation, and Missing‑Value Handling**
questions_this_excerpt_can_answer: 1. What exact code snippet does the document give for creating a SparkSession named **'ETLProcess'**?

2. Which three `DataFrame.na` methods are listed for handling missing values, and what are the precise method calls shown for each (dropping rows, filling values, and replacing values)?

3. According to the document’s metadata, what is the file size (in bytes) of **“Transformations in pyspark .pdf”**?
Excerpt:
-----
[ 
E T L 
p r o c e s s e s 
u s i n g 
P y S p a r k 
] 
# 
Q u i c k 
S u m m a r y 
 1.EnvironmentSetupandSparkSessionCreation
●
Install
PySpark
: pipinstallpyspark●
Start
a
SparkSession
: frompyspark.sqlimportSparkSession;spark=SparkSession.builder.appName(

: 

: 

: 

: 

: 

: 

In [None]:
# # Check total number of nodes created after chunking
# print(f"Total number of nodes created: {len(nodes)}")

7

: 

: 

: 

: 

: 

: 

## 5. Embedding Generation

Create embeddings using HuggingFace's BGE model and build a vector index.

In [None]:
# Initialize HuggingFace embedding model
# Using BAAI/bge-small-en-v1.5 - a small but effective embedding model
hf_embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Test the embedding model with a sample text
test_embed = hf_embeddings.get_text_embedding("Allez si ibrahim")
print(f"Embedding dimension: {len(test_embed)}")
print(f"Sample embedding values: {test_embed[:5]}")  # Show first 5 values

2025-12-27 22:14:39,423 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
2025-12-27 22:47:33,153 - INFO - 1 prompt is loaded, with the key: query


[-0.02017112821340561, 0.14482395350933075, -0.01423242874443531, -0.07562268525362015, -0.009775497950613499, -0.025659609586000443, 0.08076140284538269, 0.012043717317283154, 0.017221175134181976, -0.012245615012943745, -0.018579687923192978, -0.050895098596811295, -0.0167841874063015, 0.011886931024491787, 0.05391477048397064, 0.02962633967399597, -0.004159002564847469, 0.00959134940057993, -0.08207977563142776, -0.04079286754131317, -0.007241450250148773, -0.020212257280945778, -0.02218056470155716, -0.020687056705355644, 0.02370595373213291, 0.010519594885408878, 0.011081664822995663, 0.0004888022085651755, -0.03809983655810356, -0.14868807792663574, 0.015195298008620739, 0.030558215454220772, 0.02603444643318653, 0.04514699801802635, -0.012815529480576515, 0.02152239717543125, -0.03646758198738098, 0.024992888793349266, -0.02396818809211254, -0.017713231965899467, 0.0617869608104229, 0.024671830236911774, -0.01000327430665493, -0.11049612611532211, -0.0161515474319458, -0.0323573

: 

: 

: 

: 

: 

: 

## 6. Vector Store and Indexing

Create a vector index from the nodes for efficient similarity search.

In [None]:
# Create a vector store index from the nodes
# This builds the index in-memory for fast querying
index = VectorStoreIndex(nodes, embed_model=hf_embeddings)

## 7. Query Engine

Set up a query engine to perform RAG (Retrieval-Augmented Generation).

In [None]:
# Initialize LLM for querying (specify the appropriate Groq model)
# Replace "llamamodelname" with an actual model like "llama-3.3-70b-versatile"
llm_querying = Groq(
    model="llama-3.3-70b-versatile",  # Or another available Groq model
    api_key=os.environ["GROQ_API_KEY"]
)

# Create a query engine from the index
query_engine = index.as_query_engine(llm=llm_querying)

# Perform a sample query
response = query_engine.query("What is this document about")

print("=== Query Response ===")
print(response)

In [None]:
# Inspect the source nodes used to generate the response
# This shows which document chunks were retrieved and used
print("=== Source Nodes ===")
pprint.pprint(response.source_nodes)

## 8. Persistent Storage with ChromaDB

Store the vector index in ChromaDB for persistence across sessions.

In [None]:
# Initialize ChromaDB for persistent storage
# PersistentClient saves data to disk
db = chromadb.PersistentClient(path="./chroma_db")

# Get or create a collection for storing vectors
chroma_collection = db.get_or_create_collection("quickstart")

# Create a ChromaDB vector store
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Create storage context with ChromaDB as the vector store
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Build the index with persistent storage
# This will save embeddings to ChromaDB
index = VectorStoreIndex(
    nodes, 
    storage_context=storage_context, 
    embed_model=hf_embeddings
)

# Alternative: Build index directly from documents with transformations
# index = VectorStoreIndex.from_documents(
#     documents, 
#     storage_context=storage_context, 
#     transformations=[text_splitter, title_extractor, qa_extractor]
# )

# Create query engine from the persistent index
query_engine = index.as_query_engine(llm=llm_querying)

: 

: 

: 

: 

: 

: 

In [None]:
# Query the persistent index
response = query_engine.query("What is this document about")

print("=== Query Response (from ChromaDB) ===")
print(response)

: 

: 

: 

: 

: 

: 