In [None]:
%%capture
!pip install llama-index==0.10.37 cohere==5.5.0 openai==1.30.1 llama-index-embeddings-openai==0.1.9 llama-index-llms-cohere==0.2.0 qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8 

In [1]:
import os

from getpass import getpass
import nest_asyncio

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv()

True

In [2]:
CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter your Cohere API key: ")

In [3]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OpenAI API key: ")

In [4]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [5]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [None]:
from pathlib import Path

def create_directory(directory_name):
    path = Path(directory_name)
    path.mkdir(parents=True, exist_ok=True)
    print(f"Directory '{directory_name}' created successfully.")

create_directory("data")

In [None]:
!wget -P data https://www.gutenberg.org/cache/epub/10763/pg10763.txt 

# 🗄️ Storing

Loading and indexing data costs time and money.

By default, indexed data is stored in memory. But, you can store your data to avoid the time and costs associated with re-indexing them.  The simplest way to do this **persisting to disk**.

Each `Index` object has a `.persist()` method, which will write all the data to disk at the specified location.

Now that we've dowloaded data, let's:

1) Load as Document
2) Parse as Nodes
3) Create index

In [7]:
from llama_index.core import SimpleDirectoryReader

# file_path = "data/pg10763.txt"
# JC code
file_path = "../data/pg10763.txt"

document = SimpleDirectoryReader(input_files=[file_path], filename_as_id=True).load_data()

In [8]:
# Create Node parser
from llama_index.core.node_parser import SentenceSplitter

sentence_splitter = SentenceSplitter(
    chunk_size=512, 
    chunk_overlap=16,
    paragraph_separator="\n\n\n\n" 
)

In [13]:
# Instantiate embedding model
# from llama_index.embeddings.openai import OpenAIEmbedding
# embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")

# JC code
from llama_index.embeddings.cohere import CohereEmbedding

# embed_v3 = CohereEmbedding(api_key=CO_API_KEY,model_name="embed-english-v3.0")
embed_model = CohereEmbedding(api_key=CO_API_KEY,model_name="embed-english-light-v3.0")


# ☁️ Using a Vector Database

We'll use qdrant as our vector database of choice throughout this course.

To use qdrant to store embeddings from the `VectorStoreIndex`, you need to:

- Initialize the qdrant client

- Create a `Collection` to store your data in qdrant

- Assign qdrant as the `vector_store` in a `StorageContext`

- Initialize your `VectorStoreIndex` using that `StorageContext`

Below, we initialize a `QdrantClient` for interacting with qdrant, an open-source vector store. 


In [14]:
import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore

# initialize qdrant client
client = qdrant_client.QdrantClient(
    url=QDRANT_URL, 
    api_key=QDRANT_API_KEY,
)

vector_store = QdrantVectorStore(
    client=client, 
    collection_name="it_can_be_done",
    embed_model=embed_model,
)

2025-09-19 05:38:38,817 - INFO - HTTP Request: GET https://ee73f8ce-826d-49d8-a961-754dfeac8b0c.us-east-1-1.aws.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
2025-09-19 05:38:38,875 - INFO - HTTP Request: GET https://ee73f8ce-826d-49d8-a961-754dfeac8b0c.us-east-1-1.aws.cloud.qdrant.io:6333/collections/it_can_be_done/exists "HTTP/1.1 200 OK"


# 🗃️ Storage Context

`StorageContext` in `LlamaIndex` is a core abstraction that revolves around the storage of `Nodes`, indices, and vectors.  It facilitates data storage and retrieval.

It is a utility container that supports the following:

 - `docstore`: A [`BaseDocumentStore`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/storage/docstore/types.py) for storing nodes.

 - `index_store`: A [`BaseIndexStore`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/storage/index_store/types.py#L13) for storing indices.

 - `vector_store`: A [`VectorStore`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/vector_stores/simple.py) for storing vectors.

 - `graph_store`: A [`GraphStore`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/graph_stores/simple.py) for storing knowledge graphs.

Below we instantiate the `StorageContext` from default settings indicating that we want to use a vector store.

In [15]:
from llama_index.core import StorageContext

# assign qdrant vector store to storage context
storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    )

In [16]:
from llama_index.core import  VectorStoreIndex

# create the index
index = VectorStoreIndex.from_documents(
    document,
    show_progress=True,
    store_nodes_override=True,
    transformation=[sentence_splitter],
    embed_model=embed_model,
    storage_context=storage_context,
)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/126 [00:00<?, ?it/s]

2025-09-19 05:38:51,959 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:52,140 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:52,301 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:52,442 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:52,598 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:52,736 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:52,864 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:53,000 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:53,142 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:38:53,271 - INFO - HTTP Request: POST https://api.cohere.com/v2/embe

# 🪃 Retrieval

A `Retriever` is an interface exposed by the `Index`. An `Index` with its `Retriever` is used for storing and fetching data. The `Retriever` is a part of the `Index` and is used to retrieve the data stored in the Index.


### LlamaIndex provides [many different types of retrievers](https://github.com/run-llama/llama_index/tree/main/llama-index-core/llama_index/core/retrievers) to fetch relevant information from ingested data based on a given query. 

Some examples include

### Vector Retriever

The vector retriever uses vector similarity search to find the most relevant nodes (chunks of text) based on the query embedding. It requires a vector database like to store and search through the node embeddings.

### [Fusion Retriever](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/retrievers/fusion_retriever.py)

The fusion retriever generates multiple queries from the original query, performs retrieval over an ensemble of retrievers for each query, and then fuses and reranks the results across all queries. This aims to better capture the query intent through query rewriting and ensembling.

### [Recursive Retriever](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/retrievers/recursive_retriever.py)

The recursive retriever allows for hierarchical retrieval by first retrieving coarse nodes and then recursively retrieving finer-grained nodes within those coarse nodes. This can be useful for multi-level indexing and retrieval.

You can also combine retrievers in interesting ways and build out more advanced retrieval strategies, as we will see later in this course.


### In the example here, we're using a Vector Retriever

 - 🔍 When searching, your query is also converted into a vector embedding. 
 
- 🗂️ The `VectorStoreIndex` then performs a mathematical operation to rank embeddings based on semantic similarity to your query.

- 🔝 Top-k semantic retrieval is the simplest wasy to query a vector index.

- ⩬ You can also apply a similarity threshold  (e.g., only return results that are more similar than some value)


In [17]:
retirever = index.as_retriever(
    similarity_top_k=5,
    similarity_threshold=0.75)

In [18]:
retirever.retrieve("What lessons can be learned from the poems about success?")

2025-09-19 05:44:03,586 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:44:03,636 - INFO - HTTP Request: POST https://ee73f8ce-826d-49d8-a961-754dfeac8b0c.us-east-1-1.aws.cloud.qdrant.io:6333/collections/it_can_be_done/points/search "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='7f1f750c-ee3a-4b88-8fc9-a9a1bb1baf40', embedding=None, metadata={'file_path': '../data/pg10763.txt', 'file_name': 'pg10763.txt', 'file_type': 'text/plain', 'file_size': 405150, 'creation_date': '2025-09-19', 'last_modified_date': '2025-09-05'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='../data/pg10763.txt', node_type='4', metadata={'file_path': '../data/pg10763.txt', 'file_name': 'pg10763.txt', 'file_type': 'text/plain', 'file_size': 405150, 'creation_date': '2025-09-19', 'last_modified_date': '2025-09-05'}, hash='858a9e79c3e93620378f6d9959a1333d695c67acf64e4e9ab23e4b6a89db2653'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='374b6722-6a8f-4727-92bf-fc688

In [19]:
# JC code
result = retirever.retrieve("What lessons can be learned from the poems about success?")
type(result), len(result), result[0].node.get_content() if len(result) > 0 else "No results found"


2025-09-19 05:46:49,995 - INFO - HTTP Request: POST https://api.cohere.com/v2/embed "HTTP/1.1 200 OK"
2025-09-19 05:46:50,064 - INFO - HTTP Request: POST https://ee73f8ce-826d-49d8-a961-754dfeac8b0c.us-east-1-1.aws.cloud.qdrant.io:6333/collections/it_can_be_done/points/search "HTTP/1.1 200 OK"


(list,
 5,
 '_Percy Bysshe Shelley._\r\n\r\n\r\n\r\n\r\nVICTORY IN DEFEAT\r\n\r\n\r\nThe great, radiant souls of earth--the Davids, the Shakespeares, the\r\nLincolns--know grief and affliction as well as joy and triumph. But\r\nadversity is never to them mere adversity; it\r\n\r\n  "Doth suffer a sea-change\r\n  Into something rich and strange";\r\n\r\nand in the crucible of character their suffering itself is transmuted\r\ninto song.\r\n\r\n\r\n  Defeat may serve as well as victory\r\n  To shake the soul and let the glory out.\r\n  When the great oak is straining in the wind,\r\n  The boughs drink in new beauty, and the trunk\r\n  Sends down a deeper root on the windward side.\r\n  Only the soul that knows the mighty grief\r\n  Can know the mighty rapture. Sorrows come\r\n  To stretch out spaces in the heart for joy.\r\n\r\n\r\n_Edwin Markham._\r\n\r\nFrom "The Shoes of Happiness, and Other Poems."\r\n\r\n\r\n\r\n\r\nTHE RICHER MINES\r\n\r\n\r\nNo man is so poor but that he is a stock

But, chances are you don't just want the returned documents. You want the documents to be synthesized into a response. 

So, let's build on this pattern in the next lesson and see how we can get a response based on those retrieved documents.

In [20]:
# close the client so you're not locked out of the index
client.close()