In [None]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# 1. Load text from file
loader = TextLoader("newexample.txt")
docs = loader.load()

# 2. Split text into smaller chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks = splitter.split_documents(docs)

# 3. Initialize embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 4. Create a Chroma DB from documents
db = Chroma.from_documents(chunks, embedding_model, persist_directory="chroma_store")

# 5. Save the DB to disk
db.persist()

# 6. Load from disk (optional step)
db = Chroma(persist_directory="chroma_store", embedding_function=embedding_model)

# 7. Perform similarity search
query = "What is the tallest mountain?"
results = db.similarity_search(query, k=2)

# 8. Print results
print("\nTop Results:")
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---\n{doc.page_content}")



Top Results:

--- Result 1 ---
Mount Everest is the tallest mountain in the world.
The Eiffel Tower is in Paris.

--- Result 2 ---
queried based on similarity to a user's question.
