In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [3]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("Data").load_data()

In [4]:
documents

[Document(id_='d208acf7-04c9-4b0b-864c-abe514ad18e7', embedding=None, metadata={'file_path': 'c:\\Users\\msi1\\Videos\\LLama_Project\\Shoe_LLama\\Data\\armour_shoes_boys.csv', 'file_name': 'armour_shoes_boys.csv', 'file_type': 'application/vnd.ms-excel', 'file_size': 95280, 'creation_date': '2024-10-29', 'last_modified_date': '2024-10-29'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="Boys' Grade School UA Surge 4 Running Shoes, 63.97 - 70, These shoes are going to feel cushioned and comfortable, but also light and breathable. Why? Because when you never slow down, that's what you need., Lightweight, breathable mesh upper with synthetic overlays for added durability & support,TPU heel overla

In [5]:
index = VectorStoreIndex.from_documents(documents, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 7/7 [00:00<00:00,  7.07it/s]
Generating embeddings: 100%|██████████| 480/480 [00:07<00:00, 64.44it/s]


In [6]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x1761cd77ee0>

In [8]:
query_engine = index.as_query_engine()

In [20]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor

retriever= VectorIndexRetriever(index=index, similarity_top_k=4)
postprocessor = SimilarityPostprocessor(similarity_cutoff=0.80)

query_engine= RetrieverQueryEngine(retriever=retriever, node_postprocessors=[postprocessor])

In [21]:
response =query_engine.query("What are the key features of the Boys' Grade School UA Surge 4 Running Shoes?")

In [22]:
from llama_index.core.response.pprint_utils import pprint_response
pprint_response(response, show_source=True)
print(response)

Final Response: The key features of the Boys' Grade School UA Surge 4
Running Shoes include a lightweight and breathable mesh upper with
synthetic overlays for added durability and support, TPU heel overlay
for additional durability and support, enhanced cushioning around the
ankle collar for superior comfort, full-length EVA sockliner for added
comfort and a custom fit, cushioned EVA midsole for a lightweight and
responsive ride, and a durable outsole with slightly raised rubber
knobs for added grip and traction.
______________________________________________________________________
Source Node 1/4
Node ID: 0b4b9751-d34a-4fc0-9da1-911b28bd5a33
Similarity: 0.9052246749371357
Text: Boys' Grade School UA Surge 4 Running Shoes, 63.97 - 70, These
shoes are going to feel cushioned and comfortable, but also light and
breathable. Why? Because when you never slow down, that's what you
need., Lightweight, breathable mesh upper with synthetic overlays for
added durability & support,TPU heel over

## Saving the Index as a Persistent Storage
Any query that you're doing is happening with the index and right now, the entire index is stored in the memory. There may be cases where I want to store the entire index on my hard disk as a persistent storage. So that it could load the index from the persistent storage and perform the operations on it 

In [23]:
import os 
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)

#check if the storage exists already or not
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    #load the document and create the index
    documents = SimpleDirectoryReader("Data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    #save it for later use
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    #load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

#either way we can now query the index
query_engine = index.as_query_engine()
response = query_engine.query("What are the key features of the Boys' Grade School UA Surge 4 Running Shoes?")
print(response)

The key features of the Boys' Grade School UA Surge 4 Running Shoes include a lightweight and breathable mesh upper with synthetic overlays for added durability and support, TPU heel overlay for enhanced durability and support, enhanced cushioning around the ankle collar for superior comfort, full-length EVA sockliner for added comfort and a custom fit, cushioned EVA midsole for a lightweight and responsive ride, and a durable outsole with slightly raised rubber knobs for added grip and traction.
