In [None]:
%%capture
!pip install llama-index==0.10.25 llama-index-embeddings-fastembed llama-index-retrievers-bm25 qdrant-client llama-index-vector-stores-qdrant llama-index-llms-cohere

In [None]:
import os
import sys
from getpass import getpass
import nest_asyncio

from IPython.display import Markdown, display

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv("../.env")

sys.path.append('../helpers')

from utils import setup_llm, setup_embed_model, setup_vector_store

In [None]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OpenAI API key: ")

In [None]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [None]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [None]:
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from utils import setup_llm, setup_embed_model

setup_llm(provider="openai", model="gpt-3.5-turbo", api_key=OPENAI_API_KEY)

setup_embed_model(provider="openai", api_key=OPENAI_API_KEY)

In [54]:
from datasets import load_dataset

eval_dataset = load_dataset("harpreetsahota/LI_Learning_RAG_Eval_Set", split='train')

eval_dataset = eval_dataset.filter(lambda x: x['question_groundedness_score'] is not None and x['question_groundedness_score'] >= 4)

smol_eval_set = eval_dataset.shuffle(seed=2022).select(range(10))

In [None]:
from utils import get_documents_from_docstore

senpai_documents = get_documents_from_docstore("../data/words-of-the-senpais")

## Setup Qdrant Vector Store

In [None]:
from llama_index.core import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store.simple_index_store import SimpleIndexStore
from llama_index.core.settings import Settings
from utils import setup_vector_store

COLLECTION_NAME = "words-of-the-senpai-rr-fusion"

rr_fusion_vector_store = setup_vector_store(QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME, enable_hybrid=True)

rr_fusion_storage_context = StorageContext.from_defaults(
    docstore = SimpleDocumentStore.from_persist_dir(persist_dir="../data/words-of-the-senpais"),
    index_store=SimpleIndexStore(),
    vector_store = rr_fusion_vector_store
    )

### Ingest with a docstore

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.storage.docstore import SimpleDocumentStore

from utils import ingest 

sentence_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=16)

index = VectorStoreIndex.from_documents(
    documents=senpai_documents, 
    embed_model=Settings.embed_model,
    storage_context=rr_fusion_storage_context,
    transformations=[sentence_splitter]
    )

# A brief word on vector store query modes

The vector_store_query_mode in LlamaIndex determines the type of search to be performed. Here's a brief description of each mode:

 - `default`: This mode performs a vector search. It retrieves the most similar vectors based on the query vector.  They create a numerical representation of a piece of text, represented as a long list of numbers. These dense vectors can capture rich semantics across the entire piece of text. `alpha=0.75` is used by default.

 - `hybrid`: This mode performs a hybrid search. It combines vector search with traditional search methods. `alpha` parameter determines weighting (`alpha = 0` -> bm25, `alpha = 1` -> vector search). 

 - `semantic_hybrid`: Semantic hybrid search combines text search with vector embeddings. Text search provides keyword matching and lexical retrieval. Vector embeddings allow finding documents with similar meaning, even if they don't contain exact keyword matches. This mode incorporates semantic reranking to hybrid search results to improve search relevance.

 - `sparse`: Most of the elements in a sparse vector are zero, with only a few key values being non-zero. These sparse vectors are great at capturing specific keywords and similar small details. You need to use a specialized embedding model to create sparse vectors. 
   - `FastEmbed` has a few choices for sparse text embedding models, for example you can pass in `prithvida/Splade_PP_en_v1` as the model name when you run `setup_embed_model` if you want to use them. 
    - We didn't use a sparse vector here, so we won't see this in action.  
    - Note, if you try this you'll need to set the `sparse_top_k` argument, which represents how many nodes will be retrieved from each dense and sparse query. For example, if `sparse_top_k=5` is set, that means I will retrieve 5 nodes using sparse vectors and 5 nodes using dense vectors.

 - `text_search`: Text search looks for exact keyword matches between the query and documents.

 - `similarity_top_k`: controls the final number of returned nodes. A fusion algorithm is applied to rank and order the nodes from different vector spaces, `similarity_top_k=2` means the top two nodes after fusion are returned.

 - `hybrid_top_k`: return top k results from `hybrid` search. `similarity_top_k` is used for dense search top k

In [None]:
QUERY_STRING = "How can I create my own luck?"

def test_retrievers(query=QUERY_STRING, index=index, **kwargs):
    retriever_engine = index.as_retriever(**kwargs)
    retrieved_docs = retriever_engine.retrieve(query)
    print(f"Retrieved {len(retrieved_docs)} nodes.")
    print("\n")
    for node in retrieved_docs:
        print(f"Score: {node.score:.2f} - {node.text}...\n-----\n")
    
mode_kwargs = {
    'default': {'vector_store_query_mode': 'default', 'similarity_top_k': 3},
    'bm25': {'vector_store_query_mode':'hybrid', 'alpha': 0.0, 'hybrid_top_k': 3}, 
    'hybrid': {'vector_store_query_mode':'hybrid', 'alpha': 0.25, 'hybrid_top_k': 3},
    'semantic_hybrid': {'vector_store_query_mode':'semantic_hybrid', 'alpha': 0.75, 'hybrid_top_k': 3},
    # 'sparse': {"sparse_top_k":5},
    'text_search': {'vector_store_query_mode':'text_search', 'similarity_top_k': 3},
}

for mode, kwargs in mode_kwargs.items():
    print(f"Retrieving nodes using: {mode} retrieval")
    test_retrievers(**kwargs)
    print(f"Retrieval with {mode} complete...")        
    print("\n")

# Query Transformation

When handling user queries in a RAG system, agent, or any other pipeline, there are various ways to transform and decompose the queries before executing them.

One way is query rewriting. This involves rewriting the original query in multiple ways while which then sent sent for retrieval and generation. 

LlamaIndex implements various query transformations, [check the source code for details](https://github.com/run-llama/llama_index/blob/f116d75557d6867ed2cc61811a1c2f0b0c4d4ddb/llama-index-legacy/llama_index/legacy/indices/query/query_transform/base.py).


In [None]:
from llama_index.core import PromptTemplate

QUERY_GEN_PROMPT = """Users aren't always the best at articulating what they're looking for. Your task is to understand the 
essense of the user query and generate {num_queries} alternate queries to expand the users query so it's more robust. This way the user will
recieve the most relevant information. 

Examples are delimited by triple backticks (```) below

````
User Query: How can I find the positive in situations that seem negative?

Alternate Queries:

1. How can I cultivate optimism and positive thinking in my daily life?
2. Is it possible to find meaning and purpose in challenging or difficult times?
3. What are some effective strategies for reframing negative thoughts into positive ones?
````

````
User Query: How do I deal with setbacks, failures, delays, defeat, or other disasters?

Alternate Queries:

1. How can I build resilience and learn to cope with adversity effectively?
2. What are some practical tips for overcoming challenges and obstacles that I face?
3. How can I develop a growth mindset and view setbacks as opportunities for learning?
4. What are healthy ways to process and learn from failures and mistakes?
````
````
User Query: How can I overcome defeat and suffering by changing my mindset?

Alternate Queries:

1. What is the power of positive thinking and affirmations, and how can they benefit me?
2. Can mindfulness and meditation practices improve my mental well-being and outlook?
3. How can I develop self-compassion and acceptance, especially during difficult times?
```

Generate {num_queries} alternate queries, one on each line, for the following user query:\n
--------------------
User Query: {query}\n
--------------------

Alternate Queries:\n
"""


QUERY_GEN_PROMPT_TEMPLATE = PromptTemplate(QUERY_GEN_PROMPT)

In [None]:
def generate_queries(query= QUERY_STRING, llm=Settings.llm, num_queries  = 4):
    response = llm.predict(
        QUERY_GEN_PROMPT_TEMPLATE, 
        num_queries=num_queries, 
        query=query
        )
    queries = response.split("\n")
    queries_str = "\n".join(queries)
    print(f"Generated queries:\n{queries_str}")
    return queries

generate_queries()

# Hybrid Fusion Retriever

The Hybrid Fusion Retriever combines of semantic and keyword-based approaches.  This uses a [BM25-based retriever](https://en.wikipedia.org/wiki/Okapi_BM25) with a semantic index. BM25 is a ranking function used by search engines to estimate the relevance of documents to a given search query. 

#### How it works

The system follows a three-step process:

- **Query Generation/Rewriting**: It creates multiple queries from the original user query to better match the user's intent and improve the precision and recall of the retrieved results.

- **Retrieval**: It performs the retrieval for each query over an ensemble of retrievers.

- **Reranking/Fusion**: It combines the results from all queries and applies a reranking step to fuse the top relevant results.

#### ℹ️ Useful knowledge to have as a RAG practitioner

##### Index Fusion Mode

We set the mode to `reciprocal_rerank`. The system merges its index with a BM25 based retriever. This allows it to understand both the semantic relationships (meaningful connections between words) and keywords in the input queries. Other modes are `relative_score`, `dist_based_score`, `simple` .

  - [`reciprocal_rerank`](https://github.com/run-llama/llama_index/blob/f116d75557d6867ed2cc61811a1c2f0b0c4d4ddb/llama-index-core/llama_index/core/retrievers/fusion_retriever.py#L99): Reciprocal rank is a measure of how early a relevant item appears in a ranked list. Lower ranks correspond to higher relevance. This mode fuses the results from multiple sources by giving higher importance to nodes that appear earlier in the rankings across those sources.

  - [`relative_score`](https://github.com/run-llama/llama_index/blob/f116d75557d6867ed2cc61811a1c2f0b0c4d4ddb/llama-index-core/llama_index/core/retrievers/fusion_retriever.py#L135): It scales each score to a range from 0 to 1 using min-max scaling. Then it multiplies each scaled score by a retriever-specific weight. After that, it divides each score by the total number of queries. Basically, it scales, weights, and combines scores from multiple retrieval sources.

  - `dist_based_score`: Same as `relative_score`, but, instead of using the minimum and maximum scores directly, the function calculates them based on the mean and standard deviation of the scores. This reduces the impact of outliers on the scaling process.

  - `simple`: re-orders results based on original scores


##### **Reciprocal Rerank Algorithm**

 Since both retrievers calculate a score for the relevance of results, the system uses the reciprocal rerank algorithm to reshuffle the results. This is done without employing additional models or excessive computation, making the process more efficient.
 
  - 🧮 **Rank Calculation**: For each unique node, calculate its reciprocal rank from each list where it appears. The reciprocal rank of a node in a list is defined as 1 divided by its position in that list (e.g., a node at rank 3 has a reciprocal rank of 1/3).

  - 📊 **Score Aggregation**: Sum up the reciprocal ranks for each node across all lists in which it appears. This aggregated score represents the overall relevance of the node, taking into account its performance across multiple retrieval scenarios.

  - 🥇🥈🥉 **Reordering**: Finally, reorder all nodes based on their aggregated scores, from highest to lowest. This re-ranking step prioritizes nodes that consistently appear in higher ranks across multiple lists, thus likely to be more relevant to the query.

In [None]:
from llama_index.retrievers.bm25 import BM25Retriever

from llama_index.core.retrievers import QueryFusionRetriever

vector_retriever = index.as_retriever(similarity_top_k=5)

bm25_retriever = BM25Retriever.from_defaults(docstore=index.docstore, similarity_top_k=5)

from llama_index.core.query_engine import RetrieverQueryEngine

retriever = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    similarity_top_k=5,
    num_queries=3,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    query_gen_prompt=QUERY_GEN_PROMPT_TEMPLATE, 
)

In [None]:
nodes_with_scores = retriever.retrieve(
    "How can I stop wasting energy on projecting a facade and focus on expanding my potential as a human being?"
)

In [None]:
for node in nodes_with_scores:
    print(f"Score: {node.score:.2f} - {node.text}...\n-----\n")

In [55]:
from llama_index.core.response_synthesizers import ResponseMode

from utils import create_query_pipeline
from utils import run_generations_on_eval_set

from prompts import HYPE_ANSWER_GEN_PROMPT

HYPE_ANSWER_GEN_PROMPT_TEMPLATE = PromptTemplate(HYPE_ANSWER_GEN_PROMPT)

rr_fusion_query_engine = RetrieverQueryEngine.from_args(
    retriever,
    response_mode = ResponseMode.COMPACT_ACCUMULATE,
    use_async = True,
    text_qa_template = HYPE_ANSWER_GEN_PROMPT_TEMPLATE
    )

rr_fusion_chain = [Settings.llm,  rr_fusion_query_engine]

rr_fusion_query_pipeline = create_query_pipeline(rr_fusion_chain)

smol_eval_set = run_generations_on_eval_set(
    eval_dataset=smol_eval_set, 
    col_name="rr-fusion-answer", 
    query_pipeline=rr_fusion_query_pipeline,
    time_out=False)

Generating answers:   0%|          | 0/10 [00:00<?, ?it/s]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: Should I be forbidden from contemplating the universe and restricted to only focusing on a part of it?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: No, you should not be forbidden from contemplating the universe. It is important to have a broad perspective and consider the bigger picture in order to gain a deeper understanding of the w...

[0mGenerated queries:
1. How can I strike a balance between contemplating the universe broadly and focusing on specific aspects for deeper understanding?
2. What are the benefits of exploring the universe from a macroscopic perspective versus a microscopic perspective, and how can I integrate both approaches effectively?


Generating answers:  10%|█         | 1/10 [00:04<00:43,  4.88s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How can I determine the value of different qualities and traits in life?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: Determining the value of different qualities and traits in life is a highly subjective and personal process. Here are some steps you can take to help you determine the value of different qu...

[0mGenerated queries:
1. How can I identify and prioritize my core values and beliefs to guide my decision-making process?
2. What role do qualities like authenticity, honesty, and compassion play in shaping a meaningful and fulfilling life?


Generating answers:  20%|██        | 2/10 [00:12<00:52,  6.53s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How can I define wealth in terms of assets that earn while I sleep?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: Wealth can be defined as the accumulation of assets that generate passive income while you are not actively working. These assets can include investments such as stocks, bonds, real estate ...

[0mGenerated queries:
1. What are the different types of assets that can generate passive income and contribute to wealth accumulation?
2. How can one effectively build a diversified portfolio of income-generating assets to achieve financial independence and security?


Generating answers:  30%|███       | 3/10 [00:17<00:39,  5.58s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How can I seek understanding first, before anything else, even if I lack the necessities of life?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: Seeking understanding first, even when lacking the necessities of life, can be a challenging but rewarding endeavor. Here are some steps you can take to prioritize understanding in your lif...

[0mGenerated queries:
1. How can I develop a growth mindset and prioritize learning and understanding in my life?
2. What are some practical steps I can take to enhance my empathy and deepen my understanding of others?


Generating answers:  40%|████      | 4/10 [00:25<00:41,  6.91s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How can I better understand the importance of science and technology in shaping history and civilization?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: 1. Study the history of scientific discoveries and technological advancements: By learning about key scientific breakthroughs and technological innovations throughout history, you can see h...

[0mGenerated queries:
1. How has scientific knowledge and technological progress impacted the evolution of human societies over time?
2. What are some key examples of scientific discoveries and technological advancements that have significantly influenced historical events and societal development?


Generating answers:  50%|█████     | 5/10 [00:34<00:37,  7.49s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How should I approach situations where I don't fully understand the potential risks involved?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: When you encounter a situation where you don't fully understand the potential risks involved, it's important to take a cautious and proactive approach. Here are some steps you can take:

1....

[0mGenerated queries:
1. How can I effectively assess and manage risks in unfamiliar situations to make informed decisions?
2. What are some strategies for taking a cautious and proactive approach when facing uncertain or risky situations?


Generating answers:  60%|██████    | 6/10 [00:43<00:32,  8.11s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How far up the ladder of abstraction will parallelism go in the future?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: It is difficult to predict exactly how far up the ladder of abstraction parallelism will go in the future, as it will depend on a variety of factors such as technological advancements, rese...

[0mGenerated queries:
1. What are the potential challenges and limitations of parallelism in computing, and how can they be addressed in the future?
2. How can advancements in parallel computing benefit other fields beyond traditional computing, such as scientific research or artificial intelligence development?


Generating answers:  70%|███████   | 7/10 [00:51<00:24,  8.01s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How can I use an API to implement a rating system for news articles to counter clickbait and increase journalist credibility?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: To implement a rating system for news articles using an API, you can follow these steps:

1. Choose a reputable API that provides sentiment analysis or rating capabilities. Some popular opt...

[0mGenerated queries:
1. What are the benefits of using sentiment analysis in news article ratings and how can it improve user experience?
2. Are there any ethical considerations to keep in mind when implementing a rating system for news articles using an API?


Generating answers:  80%|████████  | 8/10 [00:59<00:15,  7.85s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How can I understand the underlying reality of what a company does and how it creates wealth?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: To understand the underlying reality of what a company does and how it creates wealth, you can start by conducting thorough research on the company. Here are some steps you can take:

1. St...

[0mGenerated queries:
1. What are the key factors to consider when researching a company's business model and financial health?
2. How can I analyze a company's competitive landscape and industry trends to evaluate its long-term growth prospects?


Generating answers:  90%|█████████ | 9/10 [01:07<00:08,  8.10s/it]

[1;3;38;2;155;135;227m> Running module 46b96384-11fc-490b-9462-c9aeea6597d5 with input: 
messages: How can I truly learn something?

[0m[1;3;38;2;155;135;227m> Running module 814a0f6b-59ac-468f-aebb-bc96f6ecdc9c with input: 
input: assistant: 1. Engage actively: Instead of passively consuming information, actively engage with the material by asking questions, taking notes, and participating in discussions.

2. Practice regularly...

[0mGenerated queries:
1. How can I enhance my learning experience by actively engaging with the material and participating in discussions?
2. What are effective strategies for reinforcing learning and retention through regular practice and repetition?


Generating answers: 100%|██████████| 10/10 [01:17<00:00,  7.74s/it]


In [56]:
for row in smol_eval_set.select(range(10)):
    print("💬\n")
    print(f"""🙋🏽‍♂️ Question: {row["question"]}""")
    print(f""""RR Fusion Reponse: {row["rr-fusion-answer"]}""")

💬

🙋🏽‍♂️ Question: Should I be forbidden from contemplating the universe and restricted to only focusing on a part of it?
"RR Fusion Reponse: Response 1: You need to focus on one big thing and attach subroutines to it. Trying to do more than one big thing will only lead to constant decision-making and confusion. Remember to map everything you learn onto your broad mission or direction to gain a deeper understanding. Find the balance between contemplating the universe and focusing on specific aspects to truly grasp the world around you. It's all about fitting ideas into your mental clothesline and compressing data to remember them effectively. Stay focused, stay driven, and keep pushing forward with purpose.
💬

🙋🏽‍♂️ Question: How can I determine the value of different qualities and traits in life?
"RR Fusion Reponse: Response 1: Hey, listen up! The key to determining the value of different qualities and traits in life lies in self-honesty. Reflect on your core values and prioritize hon

# `SubQuestionQueryEngine`

The `SubQuestionQueryEngine` works by breaking down a complex query into simpler sub-questions (with each potentially targeting a specific data source).

#### Here's how it works:

 - The `SubQuestionQueryEngine` receives a complex query.

- It then decomposes this query into several sub-questions. Each sub-question is designed to extract specific information from a particular data source.

- The engine then sends these sub-questions to their respective data sources and gathers the responses.

- Finally, it synthesizes all the intermediate responses to form a final comprehensive answer to the original complex query.

This process makes the `SubQuestionQueryEngine` particularly useful for handling compare/contrast queries across documents, as well as queries pertaining to a specific document. It's also well-suited for multi-document queries and can execute any number of sub-queries against any subset of query engine tools before synthesizing the final answer.

In [67]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

query_engine_tools = [
    QueryEngineTool(
        query_engine=index.as_query_engine(),
        metadata=ToolMetadata(
            name="the senpais",
            description="The collective thoughts and writings of all my virtual mentors",
        ),
    ),
]

sub_question_query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    use_async=True
    )

sub_question_query_engine.update_prompts({'response_synthesizer:text_qa_template':HYPE_ANSWER_GEN_PROMPT_TEMPLATE})

In [70]:
from utils import display_prompt_dict

sub_q_prompts = sub_question_query_engine.get_prompts()

display_prompt_dict(sub_q_prompts)

 **Prompt Key**: question_gen:question_gen_prompt
**Text:**
```
You are a world class state of the art agent.

You have access to multiple tools, each representing a different data source or API.
Each of the tools has a name and a description, formatted as a JSON dictionary.
The keys of the dictionary are the names of the tools and the values are the descriptions.
Your purpose is to help answer a complex user question by generating a list of sub questions that can be answered by the tools.

These are the guidelines you consider when completing your task:
* Be as specific as possible
* The sub questions should be relevant to the user question
* The sub questions should be answerable by the tools provided
* You can generate multiple sub questions for each tool
* Tools must be specified by their name, not their description
* You don't need to use a tool if you don't think it's relevant

Output the list of sub questions by calling the SubQuestionList function.

## Tools
```json
{tools_str}
```

## User Question
{query_str}

```

**Prompt Key**: response_synthesizer:text_qa_template
**Text:**
```
You're a trusted mentor to an adult mentee. Your mentee is seeking advice in the form of a question.

Below is your mentee's question:

----------------------
{query_str}
----------------------

You have some raw thoughts which you must use to formulate an answer to your mentee's question. Below are your thoughts:

----------------------
{context_str}
----------------------

Reflect on the question and your raw thoughts, then answer your mentee's question. Your response must be based on your raw thoughts, not on prior knowledge. 

DO NOT use any qualifiers, relative clauses, or introductory modifiers in your answer. Provide your answer question using the second person
perspective, speaking directly to your mentee, in the form of a OG mentor who has been there and done that and is now coming back with the
facts and giving them back to you. Use a HYPE tone and be straight up with your mentee! REMEMBER: Your response must be based on your raw thoughts, not on prior knowledge.

```

**Prompt Key**: response_synthesizer:refine_template
**Text:**
```
The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 
```



In [71]:
sub_question_query_engine.query("How can I build my own luck, what are the types of luck I should pursue, and how can I hack luck and minimize my exposure to downside while maintaining skin in the game?")

Generated 2 sub questions.
[1;3;38;2;237;90;200m[the senpais] Q: What are the different types of luck that can be pursued?
[0m[1;3;38;2;90;149;237m[the senpais] Q: How can luck be hacked to minimize exposure to downside while maintaining skin in the game?
[0m[1;3;38;2;237;90;200m[the senpais] A: The different types of luck that can be pursued are blind luck, luck through persistence, hard work, hustle, and motion, luck through becoming skilled at spotting luck in a field, and luck through building a unique character, brand, or mindset.
[0m[1;3;38;2;90;149;237m[the senpais] A: Luck can be hacked to minimize exposure to downside while maintaining skin in the game by adopting a strategy where risks are increased when winning and reduced after losses. This technique involves playing with the house money, which means betting aggressively when there is a profit and refraining from doing so when there is a deficit. This approach allows for maintaining skin in the game while managing do

Response(response="To build your own luck, pursue blind luck, luck through persistence, hard work, hustle, and motion, luck through becoming skilled at spotting luck in a field, and luck through building a unique character, brand, or mindset. Hack luck by increasing risks when winning and reducing after losses. Play with the house money - bet aggressively when there's a profit and hold back when there's a deficit. This way, you maintain skin in the game while managing downside risks effectively. Go out there and make your own luck!", source_nodes=[NodeWithScore(node=TextNode(id_='3edfead1-f08b-406e-ade1-4cf49ba2649f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Sub question: What are the different types of luck that can be pursued?\nResponse: The different types of luck that can be pursued are blind luck, luck through persistence, hard work, hustle, and motion, luck through becoming skilled at spotting luck in a f

# Hypothetical Document Embeddings (HyDE)

At a high level, [HyDE](https://arxiv.org/pdf/2212.10496.pdf) is an embedding technique that takes queries, generates a hypothetical answer, and then embeds that generated document and uses that as the final example. 

- 🧐 **Problem Tackled**: Addresses the struggle of creating fully zero-shot dense retrieval systems without relevance labels.

- 📚 **Traditional Methods**: Rely on relevance labels for document retrieval based on semantic similarities.

- 🚫 **Zero-Shot Challenge**: Especially tough without a large dataset for training.

### What is HyDE?

Given a query, `HyDE` instructs a language model to generate a hypothetical document.

This document captures relevance patterns but might contain inaccuracies or false details.

After generating the hypothetical document, an unsupervised contrastively learned encoder encodes the document into an embedding vector.

This vector identifies a neighborhood in the corpus embedding space, where similar real documents are retrieved based on vector similarity.

### How Does HyDE Work?

The process starts by feeding a query to a generative model with the instruction to "write a document that answers the question". This generates a hypothetical document that captures the essence of relevance.

 - Generates an embedding vector for a "fake" document

- It does not generate any actual text content for the document

- The embedding is solely for reserving space in the vectorstore index

- There is no full hypothetical document text you can access later

This vector is used to search against the corpus embeddings, and the most similar real documents are retrieved. The idea is that a hypothetical answer to a question is more semantically similar to the real answer than the question is. 

**In practice this means that your search would use GPT to generate a hypothetical answer, then embed that and use it for search**.

Key advantages of HyDE:

- Zero-shot, no labeled data or fine-tuning needed

- Performs comparably to fine-tuned retrievers across tasks/languages

- Grounds the query in real data via generated hypothetical documents

In [72]:
from llama_index.core.indices.query.query_transform import HyDEQueryTransform

from llama_index.core.query_engine import TransformQueryEngine

In [83]:
hyde = HyDEQueryTransform(
    include_original=True,
    )

hyde_query_engine = TransformQueryEngine(
    query_engine = index.as_query_engine(), 
    query_transform = hyde,
    )

In [78]:
display_prompt_dict(hyde_query_engine.get_prompts())

 **Prompt Key**: query_transform:hyde_prompt
**Text:**
```
Please write a passage to answer the question
Try to include as many key details as possible.


{context_str}


Passage:"""

```

**Prompt Key**: query_engine:response_synthesizer:text_qa_template
**Text:**
```
Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 
```

**Prompt Key**: query_engine:response_synthesizer:refine_template
**Text:**
```
The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 
```



In [81]:
response = hyde_query_engine.query(QUERY_STRING)

display(Markdown(f"<b>{response}</b>"))

<b>You can create your own luck by putting yourself in a position to capitalize on luck or attract luck by becoming the best at what you do, refining your skills until you excel, and being sensitive to opportunities that others may overlook.</b>

In [85]:
hyde_chain = [Settings.llm,  hyde_query_engine]

hyde_query_pipeline = create_query_pipeline(hyde_chain)

smol_eval_set = run_generations_on_eval_set(
    eval_dataset=smol_eval_set, 
    col_name="hyde-answer", 
    query_pipeline=hyde_query_pipeline,
    time_out=False
    )

Generating answers:   0%|          | 0/10 [00:00<?, ?it/s]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: Should I be forbidden from contemplating the universe and restricted to only focusing on a part of it?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: No, you should not be forbidden from contemplating the universe. It is important to have a broad perspective and consider the bigger picture in order to gain a deeper understanding of the w...

[0m

Generating answers:  10%|█         | 1/10 [00:06<01:02,  6.95s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How can I determine the value of different qualities and traits in life?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: Determining the value of different qualities and traits in life is a highly subjective and personal process. Here are some steps you can take to help you determine the value of different qu...

[0m

Generating answers:  20%|██        | 2/10 [00:19<01:23, 10.39s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How can I define wealth in terms of assets that earn while I sleep?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: Wealth can be defined as the accumulation of assets that generate passive income or earnings while you are not actively working or involved in the day-to-day management of those assets. The...

[0m

Generating answers:  30%|███       | 3/10 [00:25<00:58,  8.36s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How can I seek understanding first, before anything else, even if I lack the necessities of life?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: Seeking understanding first, even when lacking the necessities of life, can be a challenging but rewarding endeavor. Here are some steps you can take to prioritize understanding in your lif...

[0m

Generating answers:  40%|████      | 4/10 [00:37<00:57,  9.64s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How can I better understand the importance of science and technology in shaping history and civilization?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: 1. Study the history of scientific discoveries and technological advancements: By learning about key scientific breakthroughs and technological innovations throughout history, you can see h...

[0m

Generating answers:  50%|█████     | 5/10 [00:50<00:53, 10.77s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How should I approach situations where I don't fully understand the potential risks involved?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: When you encounter a situation where you don't fully understand the potential risks involved, it's important to take a cautious and proactive approach. Here are some steps you can take:

1....

[0m

Generating answers:  60%|██████    | 6/10 [01:00<00:42, 10.65s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How far up the ladder of abstraction will parallelism go in the future?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: It is difficult to predict exactly how far up the ladder of abstraction parallelism will go in the future, as it will depend on a variety of factors such as technological advancements, rese...

[0m

Generating answers:  70%|███████   | 7/10 [01:09<00:30, 10.15s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How can I use an API to implement a rating system for news articles to counter clickbait and increase journalist credibility?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: To implement a rating system for news articles using an API, you can follow these steps:

1. Choose a reputable API that provides sentiment analysis or credibility scoring for news articles...

[0m

Generating answers:  80%|████████  | 8/10 [01:18<00:19,  9.69s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How can I understand the underlying reality of what a company does and how it creates wealth?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: To understand the underlying reality of what a company does and how it creates wealth, you can start by conducting thorough research on the company. Here are some steps you can take:

1. St...

[0m

Generating answers:  90%|█████████ | 9/10 [01:29<00:10, 10.02s/it]

[1;3;38;2;155;135;227m> Running module 1e4bae76-afaa-4d79-98c3-e2927cfcd247 with input: 
messages: How can I truly learn something?

[0m[1;3;38;2;155;135;227m> Running module 3a2d21f9-1152-4ad5-8a7b-903fc2074e52 with input: 
input: assistant: 1. Engage actively: Instead of passively consuming information, actively engage with the material by asking questions, making connections, and seeking out additional resources.

2. Practice...

[0m

Generating answers: 100%|██████████| 10/10 [01:39<00:00,  9.92s/it]


In [86]:
for row in smol_eval_set.select(range(10)):
    print("💬\n")
    print(f"""🙋🏽‍♂️ Question: {row["question"]}""")
    print(f""""RR Fusion Reponse: {row["rr-fusion-answer"]}""")
    print(f""""HyDE Reponse: {row["hyde-answer"]}""")

💬

🙋🏽‍♂️ Question: Should I be forbidden from contemplating the universe and restricted to only focusing on a part of it?
"RR Fusion Reponse: Response 1: You need to focus on one big thing and attach subroutines to it. Trying to do more than one big thing will only lead to constant decision-making and confusion. Remember to map everything you learn onto your broad mission or direction to gain a deeper understanding. Find the balance between contemplating the universe and focusing on specific aspects to truly grasp the world around you. It's all about fitting ideas into your mental clothesline and compressing data to remember them effectively. Stay focused, stay driven, and keep pushing forward with purpose.
"HyDE Reponse: No, individuals should not be forbidden from contemplating the universe. It is crucial to have a broad perspective and consider the bigger picture to gain a deeper understanding of the world. At the same time, focusing on specific aspects of the universe is essential 