In [1]:
import os

import phoenix as px
from phoenix.otel import register

from pathlib import Path

from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document, StorageContext, load_index_from_storage
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.settings import Settings

from openinference.instrumentation.llama_index import LlamaIndexInstrumentor


from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.filterwarnings('ignore')

In [2]:
assert ("OPENAI_API_KEY" in os.environ), "Please set your OPENAI_API_KEY environment variable."

# Setup

In [3]:
DATA_DIR = Path('..', 'data')
SENTENCE_INDEX_PATH = Path(DATA_DIR, "sentence_index")

In [4]:
# Launch Phoenix
import phoenix as px
px.launch_app(use_temp_dir=False)

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
💽 Your data is being persisted to sqlite:////home/hrushikesh/.phoenix/phoenix.db
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


<phoenix.session.session.ThreadSession at 0x7ae3b4140470>

In [5]:
# Connect notebook to Phoenix
tracer_provider = register(project_name="LlamaIndex RAG Tracing")

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: LlamaIndex RAG Tracing
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [6]:
# Initialize the LangChain Instrumentor
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

In [7]:
llm = OpenAI(model_name="models/gpt4o-mini", temperature=0.1)

# Index Creation

In [8]:
# Load data
documents = SimpleDirectoryReader(input_files=[Path(DATA_DIR, "eBook-How-to-Build-a-Career-in-AI.pdf")]).load_data()

# Convert into a Document
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [9]:
# Creating Sentence Index
## Create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

## Llama-Index Global Settings
Settings.llm = llm
# Settings.embed_model = "local:BAAI/bge-small-en-v1.5"
Settings.node_parser = node_parser

In [10]:
# Create the sentence index
# if an index file exist, then it will load it
# if not, it will rebuild it

if not SENTENCE_INDEX_PATH.exists():
    print("Building Sentence Index")
    sentence_index = VectorStoreIndex.from_documents([document], embed_model = "local:BAAI/bge-small-en-v1.5")
    sentence_index.storage_context.persist(persist_dir=SENTENCE_INDEX_PATH)

else:
    print("Loading Sentence Index")
    sentence_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=SENTENCE_INDEX_PATH), embed_model = "local:BAAI/bge-small-en-v1.5")

Loading Sentence Index


# Post-Processing and ReRanking

In [11]:
# Instead of passing only the retrieved sentence, we pass a window of sentences - Sentence Window Retrieval
postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

In [12]:
# Rerank the sentences using a Sentence Transformer
rerank = SentenceTransformerRerank(top_n=2, model="BAAI/bge-reranker-base")

# Testing

In [13]:
# Create the query engine
sentence_window_engine = sentence_index.as_query_engine(similarity_top_k=6, node_postprocessors=[postproc, rerank])

In [14]:
window_response = sentence_window_engine.query(
    "What are the keys to building a career in AI?"
)
display_response(window_response)

**`Final Response:`** The keys to building a career in AI involve learning foundational technical skills, working on projects, finding a job, and being part of a supportive community.

In [15]:
window_response = sentence_window_engine.query(
    "According to the text, what are the three key steps of career growth in the field of AI? Provide a brief explanation of each step."
)
display_response(window_response)

**`Final Response:`** The three key steps of career growth in the field of AI are learning foundational technical skills, working on projects, and finding a job. Learning foundational technical skills involves acquiring the necessary knowledge and expertise in AI. Working on projects allows individuals to apply their skills in practical settings and gain hands-on experience. Finding a job involves securing a position in the field of AI, which can be facilitated by showcasing one's skills and experience to potential employers.

In [16]:
# Create the Chat engine
memory = ChatMemoryBuffer.from_defaults(token_limit=5000)
sentence_window_chat_engine = sentence_index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    system_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk"
        " about a book \"How to Build a Career in AI\" by Andrew Ng."
    ),
    similarity_top_k=6,
    node_postprocessors=[postproc, rerank]
)

In [17]:
window_response = sentence_window_chat_engine.chat(
    "Explain the scenario planning exercise mentioned in the context of the Covid-19 pandemic."
)
display_response(window_response)

**`Final Response:`** The scenario planning exercise mentioned in the context of the Covid-19 pandemic involved imagining different recovery scenarios and making plans for each case. In this exercise, three different recovery scenarios were considered: a quick recovery (three months), a medium recovery (one year), and a slow recovery (two years) from Covid-19. By creating plans for managing each of these scenarios, individuals could prioritize their actions and decisions based on the potential outcomes. This exercise helped individuals regain a sense of control in a time of uncertainty and allowed them to prepare for various possibilities that could arise during the pandemic.

In [18]:
window_response = sentence_window_chat_engine.chat(
    "How can this method be applied to personal life situations such as job hunting or exam preparation?"
)
display_response(window_response)

**`Final Response:`** The scenario planning method can be applied to personal life situations such as job hunting or exam preparation by following a similar approach of imagining different outcomes and making plans for each scenario. For example, if you are unsure about passing an exam or receiving a job offer, you can create plans for different possibilities:

1. Best-case scenario: Imagine that you pass the exam or receive a job offer. Plan how you will celebrate, what steps you will take next, and how you will continue to excel in your field.

2. Medium-case scenario: Consider a scenario where you may not pass the exam or receive the job offer immediately. Plan how you will reassess your approach, seek feedback, and work towards improving your skills or qualifications.

3. Worst-case scenario: Prepare for the possibility of not passing the exam or not getting the job offer. Plan how you will cope with the disappointment, learn from the experience, and identify alternative paths or opportunities to pursue.

By thinking through these different scenarios and creating plans for each, you can better navigate the uncertainties of job hunting or exam preparation. This exercise can help you stay focused, proactive, and prepared for whatever outcomes may arise, ultimately increasing your chances of success and reducing stress during the process.