### RAG stacks + ollama + Store in Vector Db

In [18]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import Ollama

# Example document
document_text = """
The Eiffel Tower was built in 1889 in Paris. It stands 324 meters tall and is one of the most visited monuments in the world.
It was originally constructed as a temporary exhibit for the 1889 World's Fair.
"""

# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks = splitter.split_text(document_text)

# Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Store in FAISS
vector_db = FAISS.from_texts(chunks, embedding=embeddings)

# Query example
query = "Tell me where is the Eiffel tower?"
similar_docs = vector_db.similarity_search(query, k=2)
context_text = "\n".join([doc.page_content for doc in similar_docs])

# Use LLaMA 3 via Ollama
llm = Ollama(model="llama3.1")

final_prompt = f"""
Answer the question using the context below. If the answer is not found, say "I don't know."

Context:
{context_text}

Question: {query}
Answer:
"""

response = llm(final_prompt)
print(response)

Paris.


In [19]:
# to inspect list in the stored chunks

ids = list(vector_db.docstore._dict.keys())

print("---- Vector DB contents ----")
for doc_id in vector_db.docstore._dict.keys():
    doc = vector_db.docstore._dict[doc_id]
    print(f"ID: {doc_id}")
    print("Chunk content:", doc.page_content)
    print("-" * 40)
    

---- Vector DB contents ----
ID: 7e4c72be-11f5-4089-84ef-9badc2721ab8
Chunk content: The Eiffel Tower was built in 1889 in Paris. It stands 324 meters tall and is one of the most
----------------------------------------
ID: bbbc3edc-9feb-4c19-9c2f-8beccbd87f97
Chunk content: is one of the most visited monuments in the world.
----------------------------------------
ID: 3c3848d5-3b74-4ac3-bd47-6b052f0ba0a5
Chunk content: It was originally constructed as a temporary exhibit for the 1889 World's Fair.
----------------------------------------


In [None]:
# add this new document to the vector db
document_text = """
Japan is the capital of Japan. Beijing is the capital of China
"""

# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks = splitter.split_text(document_text)

# Store in FAISS
vector_db.add_texts(chunks)


['e5a88bdf-b123-4ac6-96f8-34b3815fd5f8']

In [26]:
vector_db.docstore._dict.items()

dict_items([('7e4c72be-11f5-4089-84ef-9badc2721ab8', Document(id='7e4c72be-11f5-4089-84ef-9badc2721ab8', metadata={}, page_content='The Eiffel Tower was built in 1889 in Paris. It stands 324 meters tall and is one of the most')), ('bbbc3edc-9feb-4c19-9c2f-8beccbd87f97', Document(id='bbbc3edc-9feb-4c19-9c2f-8beccbd87f97', metadata={}, page_content='is one of the most visited monuments in the world.')), ('3c3848d5-3b74-4ac3-bd47-6b052f0ba0a5', Document(id='3c3848d5-3b74-4ac3-bd47-6b052f0ba0a5', metadata={}, page_content="It was originally constructed as a temporary exhibit for the 1889 World's Fair.")), ('e5a88bdf-b123-4ac6-96f8-34b3815fd5f8', Document(id='e5a88bdf-b123-4ac6-96f8-34b3815fd5f8', metadata={}, page_content='Japan is the capital of Japan. Beijing is the capital of China'))])

In [None]:
# check again teh bector db
ids = list(vector_db.docstore._dict.keys())

print("---- Vector DB contents ----")
for doc_id in vector_db.docstore._dict.keys():
    doc = vector_db.docstore._dict[doc_id]
    print(f"ID: {doc_id}")
    print("Chunk content:", doc.page_content)
    print("-" * 40)

---- Vector DB contents ----
ID: 7e4c72be-11f5-4089-84ef-9badc2721ab8
Chunk content: The Eiffel Tower was built in 1889 in Paris. It stands 324 meters tall and is one of the most
----------------------------------------
ID: bbbc3edc-9feb-4c19-9c2f-8beccbd87f97
Chunk content: is one of the most visited monuments in the world.
----------------------------------------
ID: 3c3848d5-3b74-4ac3-bd47-6b052f0ba0a5
Chunk content: It was originally constructed as a temporary exhibit for the 1889 World's Fair.
----------------------------------------
ID: e5a88bdf-b123-4ac6-96f8-34b3815fd5f8
Chunk content: Japan is the capital of Japan. Beijing is the capital of China
----------------------------------------


In [24]:
# Query example
query = "What is the capital of Japan?"
similar_docs = vector_db.similarity_search(query, k=2)
context_text = "\n".join([doc.page_content for doc in similar_docs])

# Use LLaMA 3 via Ollama
llm = Ollama(model="llama3.1")

final_prompt = f"""
Answer the question using the context below. If the answer is not found, say "I don't know."

Context:
{context_text}

Question: {query}
Answer:
"""

response = llm(final_prompt)
print(response)

The capital of Japan is Tokyo.


In [None]:
# Save