In [24]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

In [25]:
texts= [
    "The company offers 20 paid leaves annually.",
    "Maternity leave is 12 weeks as per company policy.",
    "Employee must submit leave application 2 weeks in advance."
]

In [26]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap = 20
)

In [27]:
split_docs = text_splitter.split_text(" ".join(texts))
print(f"@ Split Chunks:\n{split_docs}")

@ Split Chunks:
['The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.', 'per company policy. Employee must submit leave application 2 weeks in advance.']


In [31]:
print("🔍 Split Docs:")
for idx, chunk in enumerate(split_docs):
    print(f"{idx+1}. {chunk}")

🔍 Split Docs:
1. The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.
2. per company policy. Employee must submit leave application 2 weeks in advance.


In [32]:
sample_embedding = embeddings.embed_query("test sentence")
print(f"🔹 Sample embedding vector length: {len(sample_embedding)}")

🔹 Sample embedding vector length: 384


In [28]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [29]:
# Step 3. Store in ChromaDB vector store
vectorstore = Chroma.from_texts(split_docs, embedding=embeddings)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [30]:
query = "what to do to get the leaves?"
results = vectorstore.similarity_search(query, k=2)
print("\n🔹 Similarity Search Result:")
for r in results:
    print(r.page_content)


🔹 Similarity Search Result:
The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.
The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.


In [39]:
vectorstore = Chroma.from_texts(texts, embedding=embeddings)

query = "What about meternity leaves ?"
results = vectorstore.similarity_search(query, k=10)
print("\n🔹 Similarity Search Results:")
for r in results:
    print(r.page_content)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given



🔹 Similarity Search Results:
The company offers 20 paid leaves annually.
The company offers 20 paid leaves annually.
The company offers 20 paid leaves annually.
The company offers 20 paid leaves annually.
The company offers 20 paid leaves annually.
The company offers 20 paid leaves annually.
The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.
The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.
The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.
The company offers 20 paid leaves annually. Maternity leave is 12 weeks as per company policy.


In [43]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

texts = [
    "The company offers 20 paid leaves annually.",
    "Maternity leave is 12 weeks as per company policy.",
    "Employees must submit leave applications 2 weeks in advance."
]

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = Chroma.from_texts(texts, embedding=embeddings)

query = "How many paid leaves do employees get?"
results = vectorstore.similarity_search(query, k=2)

print("\n🔹 Similarity Search Results:")
for r in results:
    print(r.page_content)


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given



🔹 Similarity Search Results:
The company offers 20 paid leaves annually.
The company offers 20 paid leaves annually.
