In [14]:
from sentence_transformers import SentenceTransformer
from langchain.schema import Document
from langchain.vectorstores import  Chroma
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings



1. Select an embedding model

In [None]:

# Initialize embedding model
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
# embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")

2. Create LangChain documents

In [36]:
# Create LangChain documents for famous scientists

doc1 = Document(
    page_content="Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.",
    metadata={"field": "Theoretical Physics"}
)

doc2 = Document(
    page_content="Marie Curie was a pioneering physicist and chemist who conducted groundbreaking research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win in two scientific fields.",
    metadata={"field": "Chemistry"}
)

doc3 = Document(
    page_content="Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics. His work remained unchallenged for centuries.",
    metadata={"field": "Classical Physics"}
)

doc4 = Document(
    page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology.",
    metadata={"field": "Molecular Biology"}
)

doc5 = Document(
    page_content="Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.",
    metadata={"field": "Computer Science"}
)


In [6]:
docs = [doc1, doc2, doc3, doc4, doc5]

3. Create Vector Store & Add documents

In [30]:
# vector_store_chroma = Chroma(embedding_function = embedding_model, persist_directory = "chroma_db",collection_name="sample")

# Create vector store from documents
vector_store_faiss = FAISS.from_documents(docs, embedding_model)

vector_store_chroma = Chroma.from_documents(docs, embedding_model, collection_name="Scientists")


4. view documents

In [33]:
vector_store_faiss.similarity_search(query="science",k=5)

[Document(id='b9d0e6f7-8368-4af2-99d3-24375e7e2036', metadata={'field': 'Molecular Biology'}, page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology."),
 Document(id='f7ce4cbf-dedb-4233-9292-0ed625689bf1', metadata={'field': 'Computer Science'}, page_content='Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.'),
 Document(id='075d2c3f-c26e-4764-9280-51b1a98b8126', metadata={'field': 'Theoretical Physics'}, page_content='Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.'),
 Document(id='f53476a8-db37-42de-b2aa-dd566ca81997', metadata={'field': 'Classical Physics'}, page_content='Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical

In [34]:
vector_store_chroma.similarity_search(query="science",k=5)

[Document(metadata={'field': 'Molecular Biology'}, page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology."),
 Document(metadata={'field': 'Molecular Biology'}, page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology."),
 Document(metadata={'field': 'Molecular Biology'}, page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology."),
 Document(metadata={'field': 'Computer Science'}, page_content='Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.'),
 Document(metadata={'field': 'Computer Science'}, page_content='Alan Turing is consi

5. search documents


In [45]:
query = "radioactivity research"
results_faiss = vector_store_faiss.similarity_search(query, k=2)
results_chroma = vector_store_chroma.similarity_search(query, k=2)

print("FAISS Search")
for doc in results_faiss:
    print(doc.page_content)

print("*"*50)

print("Chroma Search")
for doc in results_chroma:
    print(doc.page_content)

FAISS Search
Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology.
Marie Curie was a pioneering physicist and chemist who conducted groundbreaking research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win in two scientific fields.
**************************************************
Chroma Search
Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology.
Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology.


6. search with similarity score


In [50]:
score_faiss = vector_store_faiss.similarity_search_with_score(query, k=2)
score_chroma = vector_store_chroma.similarity_search_with_relevance_scores(query, k=2)

for doc, score in score_faiss:
    print(score)

for score in score_chroma:
    print(score[-1])

0.8058425
0.87397546
0.430183290383506
0.430183290383506


8. meta-data filtering


In [52]:
# Filter by metadata: only Chemistry field
filtered_docs = vector_store_chroma.similarity_search(query="research", k=2, filter={"field": "Chemistry"})
print("Filtered by field='Chemistry':")
for doc in filtered_docs:
    print(doc.page_content)


Filtered by field='Chemistry':
Marie Curie was a pioneering physicist and chemist who conducted groundbreaking research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win in two scientific fields.
Marie Curie was a pioneering physicist and chemist who conducted groundbreaking research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win in two scientific fields.


9. update documents & view documents


In [53]:
# Step 1: Delete the old one
vector_store_chroma._collection.delete(where={"scientist": "Turing"})

# Step 2: Add the updated one
updated_doc = Document(
    page_content="Alan Turing laid the foundation of modern AI and computer science with his work during WWII.",
    metadata={"field": "Computer Science", "scientist": "Turing"}
)
vector_store_chroma.add_documents([updated_doc])

# Step 3: View all docs again
print("After update:")
for doc in vector_store_chroma.similarity_search("computer", k=5):
    print(doc.page_content)


After update:
Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.
Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.
Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.
Alan Turing laid the foundation of modern AI and computer science with his work during WWII.
Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.


10. delete document & view documents


In [54]:
# Delete Newton
vector_store_chroma._collection.delete(where={"scientist": "Newton"})

# View all after deletion
print("After deletion:")
for doc in vector_store_chroma.similarity_search("physics", k=5):
    print(doc.page_content)


After deletion:
Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.
Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.
Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.
Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics. His work remained unchallenged for centuries.
Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics. His work remained unchallenged for centuries.
