## https://python.langchain.com/docs/integrations/vectorstores/

## Chroma --> licensed under Apache 2.0

In [1]:
from langchain_community.document_loaders import TextLoader

text = TextLoader("data/intro.txt").load()

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

docs = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 30).split_documents(text)

In [3]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(model = "gemma2:2b")

  embeddings = OllamaEmbeddings(model = "gemma2:2b")


In [4]:
from langchain_chroma import Chroma

db = Chroma.from_documents(docs, embeddings)

In [5]:
query = "I recently completed my Post Graduation Diploma in Artificial Intelligence from?"
docs = db.similarity_search(query)
docs

[Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content="First of all, Thank you for giving me this opportunity.\nI'm Rishikesh Krishna Patil, from New Mumbai."),
 Document(metadata={'source': 'data/intro.txt'}, page_content='I recently completed my Post Graduation Diploma in Artificial Intelligence from CDAC pune. and I have completed my graduation from Dy Patil RAIT, Navi Mumbai in year 2023.')]

## Saving
Saving in the form of sqlite3 DB

In [7]:
Chroma.from_documents(docs, embeddings, persist_directory = "./chroma_db")

<langchain_chroma.vectorstores.Chroma at 0x1e25831eec0>

## Loading

In [None]:
db = Chroma(persist_directory = "./chroma_db", embedding_function = embeddings)
docs = db.similarity_search(query)
docs

[Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.')]

## Retriever

In [9]:
retriever = db.as_retriever()

retriever.invoke(query)

[Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.')]