### ChromaDB

##### adding texts

In [21]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv

load_dotenv()
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

vector_store = Chroma(
    collection_name="my-text-docs",
    embedding_function=embeddings,
    persist_directory="./databases/chroma_langchain_db",
)

text_docs = ["Hello World", "World Economy"]

vector_store.add_texts(texts=text_docs)


KeyboardInterrupt: 

In [None]:
more_text_docs = ["World Health", "World Crisis"]

vector_store.add_texts(texts=more_text_docs)

['dd2bd129-968f-4790-9ea8-f5f1018c072a',
 '17490069-364b-48b8-8ae1-1dd5d7574e8a']

In [None]:
print(vector_store.get()['documents'])

['Hello World', 'World Economy', 'Data Science', 'Machine Learning', 'Hello World', 'World Economy', 'World Health', 'World Crisis']


##### Adding documents

In [None]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.documents import Document
from dotenv import load_dotenv

load_dotenv()
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

vector_store = Chroma(
    collection_name="my-text-docs",
    embedding_function=embeddings,
    persist_directory="./databases/chroma_langchain_db",
)

documents = [
    Document(page_content="Hello World", metadata={"source": "source1"}),
    Document(page_content="World Economy", metadata={"source": "source2"}),
    Document(page_content="Data Science", metadata={"source": "source3"}),
    Document(page_content="Machine Learning", metadata={"source": "source4"}),
]

vector_store.add_documents(documents)

KeyboardInterrupt: 

##### similarity search

In [None]:
vector_store.similarity_search("Data", k=2)

[Document(id='b15782a0-7d2a-44ef-8751-5ffd5cbd5042', metadata={'source': 'source3'}, page_content='Data Science'),
 Document(id='f72d77e4-1e59-43c2-a806-c63cd5fe7993', metadata={'source': 'source4'}, page_content='Machine Learning')]

In [None]:
vector_store.similarity_search("World", k=2)

[Document(id='165b81c9-d03d-47b9-991a-933ef144c9d9', metadata={'source': 'source2'}, page_content='World Economy'),
 Document(id='0a6a4e79-180e-427f-b849-accbacb8e586', metadata={'source': 'source1'}, page_content='Hello World')]

In [None]:
# vector_store.delete_collection()

### FAISS

##### adding texts

In [63]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

load_dotenv()
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

embedding_dim = len(embeddings.embed_query("test"))
index = faiss.IndexFlatL2(embedding_dim)

vector_store_1 = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [64]:
vector_store_1.add_texts(texts=["Hello World", "World Economy"])

['528eea84-376b-4429-b2d4-87211b85f2c3',
 '803180b9-77f7-463d-978e-2f95d4045e2a']

In [56]:
result_1 = list(vector_store_1.docstore._dict.values())

for doc in result_1:
    print(doc)

page_content='Hello World'
page_content='World Economy'


In [65]:
vector_store_2 = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [66]:
vector_store_2.add_texts(texts=["Hello World 2", "World Economy 2"])

['cdf0452a-5718-4f04-aa08-671aef3f3b30',
 '81d7616f-5330-4046-8b4d-40b750b6146c']

In [57]:
result_2 = list(vector_store_2.docstore._dict.values())

for doc in result_2:
    print(doc)

page_content='Hello World 2'
page_content='World Economy 2'


In [None]:
# vector_store.similarity_search("World", k=2)

[Document(id='360896e3-42bd-4b0a-9d4a-8664ce197e47', metadata={}, page_content='World Economy'),
 Document(id='3c6af570-4529-47b8-84fb-a6e8825c78a3', metadata={}, page_content='World Economy 2')]

In [None]:
# del vector_store_2

In [67]:
vector_store_1.merge_from(vector_store_2)

In [68]:
result_3 = list(vector_store_1.docstore._dict.values())

for doc in result_3:
    print(doc)

page_content='Hello World'
page_content='World Economy'
page_content='Hello World 2'
page_content='World Economy 2'


##### adding documents

In [69]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

load_dotenv()
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

embedding_dim = len(embeddings.embed_query("test"))
index = faiss.IndexFlatL2(embedding_dim)

doc_vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [70]:
documents = [
    Document(page_content="Hello World", metadata={"source": "source1"}),
    Document(page_content="World Economy", metadata={"source": "source2"}),
    Document(page_content="Data Science", metadata={"source": "source3"}),
    Document(page_content="Machine Learning", metadata={"source": "source4"}),
]

doc_vector_store.add_documents(documents)

['b4c02270-da25-4b4a-bd42-2b164a068c4f',
 '6b496f9b-7ff4-46c2-a301-990d8609fa8e',
 '251065cb-04f7-430a-99f6-88d3c3c88cbd',
 'd6eb6bfd-ff59-4491-81e8-5b060387ab80']

##### similarity search

In [73]:
doc_vector_store.similarity_search("Data", k=2)

for i in doc_vector_store.docstore._dict.values():
    print(i)

page_content='Hello World' metadata={'source': 'source1'}
page_content='World Economy' metadata={'source': 'source2'}
page_content='Data Science' metadata={'source': 'source3'}
page_content='Machine Learning' metadata={'source': 'source4'}
