# Setup

In [1]:
import mlflow

from pathlib import Path
from llama_index.llms.gemini import Gemini
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document, StorageContext, load_index_from_storage, ServiceContext
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank
from llama_index.core.response.notebook_utils import display_response


from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.filterwarnings('ignore')

[nltk_data] Downloading package punkt_tab to /home/hrushikesh/.pyenv/v
[nltk_data]     ersions/LLMObs/lib/python3.12/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!


In [2]:
DATA_DIR = Path('..', 'data')
SENTENCE_INDEX_PATH = Path(DATA_DIR, "sentence_index")

In [3]:
llm = Gemini(model_name="models/gemini-1.5-flash-002", temperature=0.1)

In [4]:
# Using a local MLflow tracking server
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("LLamaIndex RAG Tracing")

2024/11/08 16:04:20 INFO mlflow.tracking.fluent: Experiment with name 'LLamaIndex RAG Tracing' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/886422159020597810', creation_time=1731062060735, experiment_id='886422159020597810', last_update_time=1731062060735, lifecycle_stage='active', name='LLamaIndex RAG Tracing', tags={}>

# Index Creation

In [5]:
# Load data
documents = SimpleDirectoryReader(input_files=[Path(DATA_DIR, "eBook-How-to-Build-a-Career-in-AI.pdf")]).load_data()

# Convert into a Document
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [6]:
# Creating Sentence Index
## Create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

## Sentence Context Model
sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    # embed_model="local:BAAI/bge-large-en-v1.5"
    node_parser=node_parser,
)

In [7]:
# Create the sentence index
# if an index file exist, then it will load it
# if not, it will rebuild it

if not SENTENCE_INDEX_PATH.exists():
    sentence_index = VectorStoreIndex.from_documents([document], service_context=sentence_context)
    sentence_index.storage_context.persist(persist_dir=SENTENCE_INDEX_PATH)

else:
    sentence_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=SENTENCE_INDEX_PATH), service_context=sentence_context)

# Post-Processing and ReRanking

In [8]:
# Instead of passing only the retrieved sentence, we pass a window of sentences - Sentence Window Retrieval
postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

In [9]:
# Rerank the sentences using a Sentence Transformer
rerank = SentenceTransformerRerank(top_n=2, model="BAAI/bge-reranker-base")

# Testing

In [10]:
# Create the query engine
sentence_window_engine = sentence_index.as_query_engine(similarity_top_k=6, node_postprocessors=[postproc, rerank])

In [11]:
# # Log the query engine to mlfow
# with mlflow.start_run():
#     model_info = mlflow.llama_index.log_model(
#         sentence_window_engine,
#         artifact_path="rag-index-query",
#         engine_type="query",
#     )

In [12]:
mlflow.llama_index.autolog(
    silent=True,
    log_traces=True,
)

In [13]:
window_response = sentence_window_engine.query(
    "What are the keys to building a career in AI?"
)

In [14]:
display_response(window_response)

**`Final Response:`** The keys to building a career in AI include learning foundational technical skills, working on projects, and finding a job, all while being part of a supportive community.

# (Optional) Logging LlamaIndex Model to MLFlow

In [15]:
query_engine_path = "./llamaindex-adv-rag-model.py"

with mlflow.start_run():
    model_info = mlflow.llama_index.log_model(
        llama_index_model=query_engine_path,
        artifact_path="rag-index-query",
        engine_type="query",
    )

2024/11/08 16:05:00 INFO mlflow.llama_index.serialize_objects: API key(s) will be removed from the global Settings object during serialization to protect against key leakage. At inference time, the key(s) must be passed as environment variables.
2024/11/08 16:05:00 INFO mlflow.llama_index.serialize_objects: API key(s) will be removed from the global Settings object during serialization to protect against key leakage. At inference time, the key(s) must be passed as environment variables.
2024/11/08 16:05:37 INFO mlflow.tracking._tracking_service.client: 🏃 View run rambunctious-koi-788 at: http://localhost:5000/#/experiments/886422159020597810/runs/a4db4ba1feb14c11a08ad898faa9e665.
2024/11/08 16:05:37 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/886422159020597810.


In [16]:
model_info.model_uri

'runs:/a4db4ba1feb14c11a08ad898faa9e665/rag-index-query'

In [17]:
# Load the model and run inference
sentence_window_query_engine = mlflow.llama_index.load_model(model_uri=model_info.model_uri)

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

In [18]:
window_response = sentence_window_query_engine.query("What are the keys to building a career in AI?")
display_response(window_response)

**`Final Response:`** The keys to building a career in AI include learning foundational technical skills, working on projects, and finding a job, all while being part of a supportive community.