In [None]:
%%capture
!pip install llama-index llama-index-embeddings-openai qdrant-client llama-index-vector-stores-qdrant

In [None]:
import os
from getpass import getpass

In [None]:
os.environ['OPENAI_API_KEY'] = getpass("Enter your OpenAI API key: ")

# Storing

Loading and indexing data costs time and money.

By default, indexed data is stored in memory. But, you can store your data to avoid the time and costs associated with re-indexing them.  The simplest way to do this **persisting to disk**.

Each `Index` object has a `.persist()` method, which will write all the data to disk at the specified location.

In [None]:
from pathlib import Path

def create_directory(directory_name):
    path = Path(directory_name)
    path.mkdir(parents=True, exist_ok=True)
    print(f"Directory '{directory_name}' created successfully.")

create_directory("data")

create_directory("persisted_storage")

In [None]:
!wget -P data https://www.gutenberg.org/cache/epub/10763/pg10763.txt

Now that we've dowloaded data, let's:

1) Load as Document
2) Parse as Nodes
3) Create index

In [None]:
# load as document
from llama_index.core import SimpleDirectoryReader

file_path = "data/pg10763.txt"

document = SimpleDirectoryReader(input_files=[file_path], filename_as_id=True).load_data()


In [None]:
# parse as nodes
from llama_index.core.node_parser import SentenceSplitter

sentence_splitter = SentenceSplitter(
    chunk_size=512, 
    chunk_overlap=16,
    paragraph_separator="\n\n\n\n" 
)

nodes = sentence_splitter.get_nodes_from_documents(document)

In [None]:
# create index
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import  VectorStoreIndex

embed_model = OpenAIEmbedding(model="text-embedding-3-small")

index = VectorStoreIndex(nodes, embed_model=embed_model)

Use the `.persist()` method of the index to store the indexed data to disk.

In [None]:
index.storage_context.persist(persist_dir="persisted_storage")

Now you can avoid re-loading and re-indexing your data by loading the persisted index.

In [None]:
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="persisted_storage")

Note: If the index is initialized with a custom `transformation`, `embed_model`, etc. then you need to pass those same options during `load_index_from_storage`

In [None]:
index = load_index_from_storage(storage_context, embed_model=embed_model)

# Using a Vector Database

We'll use qdrant as our vector database of choice throughout this course.

To use qdrant to store embeddings from the `VectorStoreIndex`, you need to:

- Initialize the qdrant client

- Create a `Collection` to store your data in qdrant

- Assign qdrant as the `vector_store` in a `StorageContext`

- Initialize your `VectorStoreIndex` using that `StorageContext`

Below, we initialize a `QdrantClient` for interacting with qdrant, an open-source vector store. 

We use `location=":memory:"` for in-memory operations, ideal for quick, lightweight experiments without needing an external qdrant deployment. 

In [None]:
import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore

create_directory("persisted_storage/qdrant")

# initialize qdrant client
client = qdrant_client.QdrantClient(
    path="persisted_storage/qdrant"
)

vector_store = QdrantVectorStore(
    client=client, 
    collection_name="it_can_be_done",
    prefer_grpc=True
)

# assign qdrant vector store to storage context
storage_context = StorageContext.from_defaults(
    vector_store=vector_store)

# create the index
index = VectorStoreIndex(
    nodes,
    storage_context=storage_context
)

Sneak peek at querying

In [None]:
query_engine = index.as_query_engine()

In [None]:
query_engine.query("What lessons can be learned from the poems about success?")

In [None]:
query_engine.query("Does this book include a poem titled Start Where You Stand?")

# Inserting Documents or Nodes

In [None]:
from llama_index.core import Document

my_poem = """Beneath the endless sky, where stars do meet the eye,
A path of inner peace, where truth and duty lie.
In the heart of a Sikh, where the Guru's wisdom flows,
And in the Stoic's mind, where calm reflection grows.

"Accept what life may bring," the Stoic gently teaches,
"Embrace your fate with grace," their quiet wisdom reaches.
The Sikh, in vibrant faith, sees God in all, not some,
In service and in love, their kindred spirits come.

Through storms and tranquil days, their journey is the same,
To live with virtue's light, and keep alive the flame.
For in the end, it's not the riches or the fame,
But how we played the game, and honored life's true name.

So, let us walk this path, with courage, love, and grace,
United in our quest, in this vast human race.
A Sikh Stoic's heart, where peace and duty blend,
On this eternal road, that winds and never ends.
"""

poem_document = Document(text=my_poem)

index.insert(poem_document)

In [None]:
response = query_engine.query("Are there any poems about Sikh and Stoic philosophy?")

In [None]:
print(response)

In [None]:
response.__dict__

Now, let's persist the `qdrant` index to disk for future use.

In [None]:
index.storage_context.persist(persist_dir="persisted_storage/qdrant")