# RAG: Query with IlamaIndex + Milvus + Llama @ Replicate

References
- https://docs.llamaindex.ai/en/stable/examples/vector_stores/MilvusIndexDemo/
- https://docs.llamaindex.ai/en/stable/api_reference/storage/vector_store/milvus/?h=milvusvectorstore#llama_index.vector_stores.milvus.MilvusVectorStore

## Configuration

In [1]:
class MyConfig:
    pass

MY_CONFIG = MyConfig()

MY_CONFIG.EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
MY_CONFIG.EMBEDDING_LENGTH = 384

MY_CONFIG.INPUT_DATA_DIR = "input_data/walmart-reports-1"

MY_CONFIG.DB_URI = './rag_2_llamaindex.db'
MY_CONFIG.COLLECTION_NAME = 'llamaindex_walmart_docs'

MY_CONFIG.LLM_MODEL = "meta/meta-llama-3-8b-instruct"

In [2]:
# If connection to https://huggingface.co/ failed, uncomment the following path
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

In [3]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name = MY_CONFIG.EMBEDDING_MODEL
)



In [4]:
# connect to vector db
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.milvus import MilvusVectorStore


vector_store = MilvusVectorStore(
    uri = MY_CONFIG.DB_URI ,
    dim = MY_CONFIG.EMBEDDING_LENGTH , 
    collection_name = MY_CONFIG.COLLECTION_NAME,
    overwrite=True
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

print ("✅ Connected Llama-index to Milvus instance: ", MY_CONFIG.DB_URI )

✅ Connected Llama-index to Milvus instance:  ./rag_2_llamaindex.db


In [5]:
%%time

# load the index

from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

print ("✅ Loaded index from db: ", MY_CONFIG.DB_URI )


✅ Loaded index from db:  ./rag_2_llamaindex.db
CPU times: user 125 ms, sys: 28 ms, total: 153 ms
Wall time: 151 ms


In [6]:
import os
## Load Settings from .env file
from dotenv import find_dotenv, dotenv_values

# _ = load_dotenv(find_dotenv()) # read local .env file
config = dotenv_values(find_dotenv())

MY_CONFIG.REPLICATE_API_TOKEN = config.get('REPLICATE_API_TOKEN')
if  MY_CONFIG.REPLICATE_API_TOKEN:
    print ("✅ config REPLICATE_API_TOKEN found")
else:
    raise Exception ("'❌ REPLICATE_API_TOKEN' is not set.  Please set it above to continue...")


os.environ["REPLICATE_API_TOKEN"] = config.get('REPLICATE_API_TOKEN')

✅ config REPLICATE_API_TOKEN found


In [7]:
from llama_index.llms.replicate import Replicate
from llama_index.core import Settings

llm = Replicate(
    model= MY_CONFIG.LLM_MODEL,
    temperature=0.1
)

Settings.llm = llm

In [8]:
query_engine = index.as_query_engine()
res = query_engine.query("What was Walmart's revenue in 2023?")
print(res)

Empty Response


In [9]:
query_engine = index.as_query_engine()
res = query_engine.query("How many distribution facilities does Walmart have?")
print(res)

Empty Response


In [10]:
query_engine = index.as_query_engine()
res = query_engine.query("When was the moon landing?")
print(res)

Empty Response
