In [1]:
#%%capture
#!pip install llama-index==0.10.37 llama-index-embeddings-openai==0.1.9 qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8 llama-index-llms-openai==0.1.19

In [2]:
import os
import sys
from getpass import getpass
import nest_asyncio

from IPython.display import Markdown, display

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv("")

sys.path.append('../helpers')

from utils import setup_llm, setup_embed_model, setup_vector_store

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /opt/conda/envs/lil_llama_index/lib/python3.10/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OpenAI API key: ")

In [4]:
QDRANT_URL = ":memory:"

In [5]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [6]:
from llama_index.core.settings import Settings
from utils import setup_llm, setup_embed_model

setup_llm(
    provider="openai",
    api_key=OPENAI_API_KEY, 
    model="gpt-4o", 
    temperature=0.75, 
    system_prompt="""Use ONLY the provided context and generate a complete, coherent answer to the user's query. 
    Your response must be grounded in the provided context and relevant to the essence of the user's query.
    """
    )

setup_embed_model(
    provider="openai",
    model="text-embedding-3-small",
    api_key=OPENAI_API_KEY
    )

In [7]:
import random
from utils import get_documents_from_docstore, group_documents_by_author, sample_documents

documents = get_documents_from_docstore("../data/words-of-the-senpais")

random.seed(42)

documents_by_author = group_documents_by_author(documents)

senpai_documents = sample_documents(documents_by_author, num_samples=25)

# 🗃️ Metadata for Nodes

Metadata provides additional context or information about the nodes.

During retrieval we can leverage this additional context and information, for more precise and relevant retrieval. However, the effectiveness of this approach depends on the quality and relevance of the metadata tags used. The most simplest way to add metadata is to do so manually. 

Let's add some metadata for what each of our Senpai's are known for.

In [8]:
known_for = {
    "Naval Ravikant": "Known for his insights on how to build wealth and achieve happiness through developing specific knowledge, embracing accountability, playing long-term games, and understanding the power of compound interest in all areas of life.",
    "Balaji Srinivasan": "Has insights on how to think independently, identify opportunities, and build a better future through the strategic application of technology and clear reasoning.",
    "Paul Graham": "Provides advice on the hacker mindset, arguing that hackers are really makers and creators - akin to painters - who can leverage their unique way of thinking to push boundaries, challenge the status quo, and shape the future through technology and entrepreneurship.",
    "Nassim Nicholas Taleb": "Argues for 'Skin in the Game', that is having a personal stake in the outcome is necessary for fairness as it aligns incentives and exposes individuals to both the potential rewards and risks of their decisions.",
    "Seneca": "Offers timeless advice on how to cultivate wisdom, build mental resilience, and live a life of purpose and contentment by focusing on what is essential, mastering one's emotions, and aligning oneself with nature.",
    "Bruce Lee": "Offers profound wisdom on self-improvement, personal growth, and martial arts philosophy, emphasizing the importance of adaptability, self-expression, and embracing one's own unique path in life, "
}

In [9]:
for document in senpai_documents:
    document.metadata['known_for'] = known_for.get(document.metadata['author']) 

In [10]:
senpai_documents[42].metadata

{'page_number': 113,
 'file_name': '../data/anthology_of_balaji.pdf',
 'title': 'The Anthology of Balaji Srinivasan',
 'author': 'Balaji Srinivasan',
 'known_for': 'Has insights on how to think independently, identify opportunities, and build a better future through the strategic application of technology and clear reasoning.'}


# Automatically Extract Metadata

Metadata extraction in LlamaIndex is a process that helps to disambiguate similar-looking passages of text, especially in long documents. 

This is achieved by using LLMs to extract contextual information relevant to the document. This information aids the retrieval and language models in distinguishing between similar passages.

In LlamaIndex, metadata extraction is performed using various feature extractors within the [`MetadataExtractor`](https://github.com/run-llama/llama_index/tree/954398e1957027a364d0d332fee61733ad322f8b/llama-index-core/llama_index/core/extractors) class. 

These extractors include:

 - `SummaryExtractor`: This extractor automatically generates a summary over a set of Nodes.

 - `QuestionsAnsweredExtractor`: This extractor identifies a set of questions that each Node can answer.

 - `TitleExtractor`: This extractor identifies a title over the context of each Node.

 - `KeywordExtractor`: Keywords that uniquely identify the node



In [11]:
from llama_index.core.extractors import  SummaryExtractor, QuestionsAnsweredExtractor, TitleExtractor, KeywordExtractor

In [12]:
print(SummaryExtractor().prompt_template)

Here is the content of the section:
{context_str}

Summarize the key topics and entities of the section. 
Summary: 


In [13]:
print(QuestionsAnsweredExtractor().prompt_template)

Here is the context:
{context_str}

Given the contextual information, generate {num_questions} questions this context can provide specific answers to which are unlikely to be found elsewhere.

Higher-level summaries of surrounding context may be provided as well. Try using these summaries to generate better questions that this context can answer.




In [14]:
print(TitleExtractor().node_template)

Context: {context_str}. Give a title that summarizes all of the unique entities, titles or themes found in the context. Title: 


#### KeywordExtractor has it's prompt template buried in an LLM call, and not an attribute.

Here's what it is in [the source code](https://github.com/run-llama/llama_index/blob/954398e1957027a364d0d332fee61733ad322f8b/llama-index-core/llama_index/core/extractors/metadata_extractors.py#L198):

```python
f"""\
{{context_str}}. Give {self.keywords} unique keywords for this \
document. Format as comma separated. Keywords: 
```

# Automated Metadata Extraction

Let's perform some automated metadata extraction for better retrieval results. 

We'll employ two extractors: 

 - `QuestionAnsweredExtractor` to generates question/answer pairs from a piece of text
 
 - `SummaryExtractor` to extracts summaries, not only within the current text, but also within adjacent texts. 
 
This strategy leads to higher quality answer given retrieved results.

To do this, we define metadata extractors:
 
 - `qa_extractor`
 
 - `summary_extractor`

 Note the use of `MetadataMode.EMBED` this specifies how metadata is handled when generating embeddings for a document or node. When you call the `get_content()` function on a document and specify `MetadataMode.EMBED`, it returns the content of the document with the metadata that is visible to the embedding model.

 We'll also use `GPT-3.5-Turbo` to generate the metadata.

#### 👨🏽‍💻 I encourage you to try out the other metadata extractors and see what your results look like. 

For example, you can try the `KeywordExtractor` or  `TitleExtractor` like so:

```python

keyword_extractor = KeywordExtractor(keywords=10, llm=llm)

title_extractor = TitleExtractor(nodes=5, llm=llm)

```

In [15]:
from llama_index.core.schema import MetadataMode
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.extractors import SummaryExtractor, QuestionsAnsweredExtractor
from llama_index.llms.openai import OpenAI

qa_llm = OpenAI(model="gpt-4o")

text_splitter = TokenTextSplitter(chunk_size=256, chunk_overlap=16)

qa_extractor = QuestionsAnsweredExtractor(
    questions=2, 
    llm=qa_llm, 
    metadata_mode=MetadataMode.EMBED,
    embed_model=Settings.embed_model,
    )

summary_extractor = SummaryExtractor(
    summaries=["prev", "self", "next"], 
    llm=qa_llm,
    )


### 👷🏽‍♂️ 🗂️ Ingest to Qdrant and Build the Index 

In the last few videos we did the node splitting first and then ingested to Qdrant. That was to make the pattern clear to you and give you a sense of how splitting works.

But, we can actually just do this kind of stuff directly using the ingetsion pipeline.

Note, I will leave it up to you to experiment using with one, or both of the extractors and fiddling with the hyperparameters.

The parsing here took ~30 minutes.


In [16]:
from llama_index.core import StorageContext
from llama_index.core.settings import Settings

from utils import create_index, create_query_engine, ingest, setup_vector_store

COLLECTION_NAME = "words-of-the-senpai-qa-plus-summaries-nodes"

qa_summaries_vector_store = setup_vector_store(QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME)

transforms = [text_splitter, qa_extractor, summary_extractor, Settings.embed_model]

qa_summaries = ingest(
    documents=senpai_documents,
    transformations=transforms,
    vector_store=qa_summaries_vector_store
)
qa_summaries_index = create_index(
    from_where="vector_store",
    vector_store=qa_summaries_vector_store,
    embed_model=Settings.embed_model,
    )

Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.
100%|██████████| 367/367 [06:09<00:00,  1.01s/it]
100%|██████████| 367/367 [07:05<00:00,  1.16s/it]


In [17]:
len(qa_summaries)

367

In [18]:
qa_summaries[100].__dict__

{'id_': 'de147907-09a2-4bd5-ad73-dd32f3111ff9',
 'embedding': [-0.025902079418301582,
  -0.008512385189533234,
  -0.02736177109181881,
  -0.0019679770339280367,
  -0.03607523441314697,
  0.0031092921271920204,
  0.027093663811683655,
  -0.00021481112344190478,
  -0.03878609091043472,
  0.004408864304423332,
  -0.0030385416466742754,
  0.02363806776702404,
  0.051238153129816055,
  -0.019110044464468956,
  0.00036934480885975063,
  0.04629307612776756,
  -0.0031781806610524654,
  0.027719246223568916,
  -0.0700652003288269,
  -0.026185080409049988,
  -0.011878612451255322,
  -0.02959599159657955,
  -0.09848450124263763,
  0.01502141822129488,
  0.015237392857670784,
  0.025053074583411217,
  -0.00997207686305046,
  0.00886986032128334,
  -0.01815677620470524,
  -0.007317076437175274,
  -0.00329733919352293,
  0.00993483979254961,
  0.00916031002998352,
  -0.008177251555025578,
  -0.01185627095401287,
  -0.029357675462961197,
  0.012444615364074707,
  0.01525228749960661,
  0.01858872547

In [19]:
print(qa_summaries[100].get_content(metadata_mode="all"))

[Excerpt from document]
page_number: 180
file_name: ../data/hackers_and_painters.pdf
title: Hackers and Painters
author: Paul Graham
known_for: Provides advice on the hacker mindset, arguing that hackers are really makers and creators - akin to painters - who can leverage their unique way of thinking to push boundaries, challenge the status quo, and shape the future through technology and entrepreneurship.
questions_this_excerpt_can_answer: 1. How does Paul Graham's perspective on the equivalence of programming languages influence his advice to hackers and creators in "Hackers and Painters"?

2. In "Hackers and Painters," what argument does Paul Graham make about the relationship between hackers and painters, and how does this analogy support his views on programming languages and innovation?
prev_section_summary: The section from "Hackers and Painters" by Paul Graham critiques the mindset of a stereotypical manager, referred to as the "pointy-haired boss," who prefers Java as a progra

### 🔧 Setup Query Engine and Pipeline

In [20]:
from llama_index.core import PromptTemplate
from utils import create_query_engine
from prompts import HYPE_ANSWER_GEN_PROMPT

HYPE_ANSWER_GEN_PROMPT_TEMPLATE = PromptTemplate(HYPE_ANSWER_GEN_PROMPT)

qa_summaries_query_engine = create_query_engine(
    index=qa_summaries_index, 
    mode="query",
    response_mode="compact",
    similiarty_top_k=5,
    vector_store_query_mode="mmr", 
    vector_store_kwargs={"mmr_threshold": 0.42},
    )

qa_summaries_query_engine.update_prompts({'response_synthesizer:text_qa_template':HYPE_ANSWER_GEN_PROMPT_TEMPLATE})

In [21]:
from utils import create_query_pipeline

from llama_index.core.query_pipeline import InputComponent

input_component = InputComponent()

qa_summaries_chain = [input_component, qa_summaries_query_engine]

qa_summaries_query_pipeline = create_query_pipeline(qa_summaries_chain)

In [22]:
qa_summaries_query_pipeline.run(input="How can I ensure unswerving decision-making in my life?")

[1;3;38;2;155;135;227m> Running module 245a2951-ff9d-46df-94b2-b6a6babfb68d with input: 
input: How can I ensure unswerving decision-making in my life?

[0m[1;3;38;2;155;135;227m> Running module 5437fb5e-b0d2-4786-aaa3-053530a6273e with input: 
input: How can I ensure unswerving decision-making in my life?

[0m

Response(response='To ensure unswerving decision-making in your life, you gotta arm yourself with the best mental models out there. Dive into evolution, game theory, and soak up insights from legends like Charlie Munger. Build that arsenal of principles, not just memories based on past events. This is your toolkit for smashing those nonlinear returns. Check out resources like the Farnam Street blog to sharpen your accuracy and rationality in decision-making. Your brain\'s a prediction machine—feed it wisely. And for your life strategy, embrace "bounded commitment." List your options, pick the best, and commit for a set time. It\'s all about agile moves and strategic thinking. Treat your time like capital. Keep hustling and stack those wins!', source_nodes=[NodeWithScore(node=TextNode(id_='8c10ec15-c230-4307-9a9b-482d6e807fe6', embedding=None, metadata={'page_number': 76, 'file_name': '../data/almanack_of_naval_ravikant.pdf', 'title': 'The Almanack of Naval Ravikant', 'author': 'Naval R