# Setup

In [1]:
import mlflow

from pathlib import Path

from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document, StorageContext, load_index_from_storage
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.settings import Settings


from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.filterwarnings('ignore')

In [2]:
DATA_DIR = Path('..', 'data')
SENTENCE_INDEX_PATH = Path(DATA_DIR, "sentence_index")

In [3]:
llm = OpenAI(model_name="models/gpt4o-mini", temperature=0.1)

In [4]:
# Using a local MLflow tracking server
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("LLamaIndex RAG Tracing")

mlflow.llama_index.autolog(
    silent=True,
    log_traces=True,
)

2024/11/11 14:01:30 INFO mlflow.tracking.fluent: Experiment with name 'LLamaIndex RAG Tracing' does not exist. Creating a new experiment.


# Index Creation

In [5]:
# Load data
documents = SimpleDirectoryReader(input_files=[Path(DATA_DIR, "eBook-How-to-Build-a-Career-in-AI.pdf")]).load_data()

# Convert into a Document
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [6]:
# Creating Sentence Index
## Create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

## Llama-Index Global Settings
Settings.llm = llm
# Settings.embed_model = "local:BAAI/bge-small-en-v1.5"
Settings.node_parser = node_parser

In [7]:
# Create the sentence index
# if an index file exist, then it will load it
# if not, it will rebuild it

if not SENTENCE_INDEX_PATH.exists():
    print("Building Sentence Index")
    sentence_index = VectorStoreIndex.from_documents([document], embed_model = "local:BAAI/bge-small-en-v1.5")
    sentence_index.storage_context.persist(persist_dir=SENTENCE_INDEX_PATH)

else:
    print("Loading Sentence Index")
    sentence_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=SENTENCE_INDEX_PATH), embed_model = "local:BAAI/bge-small-en-v1.5")

Loading Sentence Index


# Post-Processing and ReRanking

In [8]:
# Instead of passing only the retrieved sentence, we pass a window of sentences - Sentence Window Retrieval
postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

In [9]:
# Rerank the sentences using a Sentence Transformer
rerank = SentenceTransformerRerank(top_n=2, model="BAAI/bge-reranker-base")

# Testing

In [10]:
# Create the query engine
sentence_window_engine = sentence_index.as_query_engine(similarity_top_k=6, node_postprocessors=[postproc, rerank])

In [11]:
window_response = sentence_window_engine.query(
    "What are the keys to building a career in AI?"
)
display_response(window_response)

**`Final Response:`** The keys to building a career in AI involve learning foundational technical skills, working on projects, finding a job, and being part of a supportive community.

In [12]:
window_response = sentence_window_engine.query(
    "According to the text, what are the three key steps of career growth in the field of AI? Provide a brief explanation of each step."
)
display_response(window_response)

**`Final Response:`** The three key steps of career growth in the field of AI are learning foundational technical skills, working on projects, and finding a job. Learning foundational technical skills involves acquiring the necessary knowledge and expertise in AI. Working on projects allows individuals to apply their skills in practical settings and gain hands-on experience. Finding a job involves securing a position in the field of AI, which can be facilitated by showcasing one's skills and experience to potential employers.

In [13]:
# Create the Chat engine
memory = ChatMemoryBuffer.from_defaults(token_limit=5000)
sentence_window_chat_engine = sentence_index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    system_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk"
        " about a book \"How to Build a Career in AI\" by Andrew Ng."
    ),
    similarity_top_k=6,
    node_postprocessors=[postproc, rerank]
)

In [14]:
window_response = sentence_window_chat_engine.chat(
    "Explain the scenario planning exercise mentioned in the context of the Covid-19 pandemic."
)
display_response(window_response)

**`Final Response:`** The scenario planning exercise mentioned in the context of the Covid-19 pandemic involved imagining different recovery scenarios and making plans for each case. In this exercise, the individual considered three possible scenarios: a quick recovery (three months), a medium recovery (one year), and a slow recovery (two years) from Covid-19. By creating plans for managing each scenario, the individual was able to prioritize actions and decisions based on the potential outcomes. This exercise helped in regaining a sense of control and preparedness in the face of uncertainty during the pandemic.

In [15]:
window_response = sentence_window_chat_engine.chat(
    "How can this method be applied to personal life situations such as job hunting or exam preparation?"
)
display_response(window_response)

**`Final Response:`** The scenario planning method can be applied to personal life situations such as job hunting or exam preparation by considering different possible outcomes and creating plans for each scenario. For example, if you are unsure about passing an exam, getting a job offer, or being granted a visa, you can write out what you would do in each of the likely scenarios. By thinking through the possibilities and following through on plans for each outcome, you can navigate these situations more effectively and reduce stress associated with uncertainty. This approach helps you prepare for different possibilities and take proactive steps to achieve your goals, no matter what the future brings.

# (Optional) Logging LlamaIndex Model to MLFlow

### Query Retrieval

In [16]:
query_engine_path = "./llamaindex-adv-rag-model.py"

with mlflow.start_run():
    model_info = mlflow.llama_index.log_model(
        llama_index_model=query_engine_path,
        artifact_path="rag-index-query",
        engine_type="query",
    )

2024/11/11 14:01:59 INFO mlflow.llama_index.serialize_objects: API key(s) will be removed from the global Settings object during serialization to protect against key leakage. At inference time, the key(s) must be passed as environment variables.
2024/11/11 14:01:59 INFO mlflow.llama_index.serialize_objects: API key(s) will be removed from the global Settings object during serialization to protect against key leakage. At inference time, the key(s) must be passed as environment variables.
2024/11/11 14:02:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run vaunted-hound-796 at: http://localhost:5000/#/experiments/334194126917619707/runs/f55d2dd2281c4e15b5bab54df5805612.
2024/11/11 14:02:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/334194126917619707.


In [17]:
# Load the model and run inference
sentence_window_query_engine = mlflow.llama_index.load_model(model_uri=model_info.model_uri)

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

In [18]:
window_response = sentence_window_query_engine.query("What are the keys to building a career in AI?")
display_response(window_response)

**`Final Response:`** The keys to building a career in AI involve learning foundational technical skills, working on projects, finding a job, and being part of a supportive community.

In [19]:
window_response = sentence_window_engine.query(
    "According to the text, what are the three key steps of career growth in the field of AI? Provide a brief explanation of each step."
)
display_response(window_response)

**`Final Response:`** The three key steps of career growth in the field of AI are learning foundational technical skills, working on projects, and finding a job. Learning foundational technical skills involves acquiring the necessary knowledge and expertise in AI. Working on projects allows individuals to apply their skills in practical scenarios and gain hands-on experience. Finding a job involves securing a position in the AI field, which can be facilitated by showcasing one's skills and experience to potential employers.