In [1]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [2]:
loader = TextLoader(r"speeh.txt")
documents = loader.load()
documents

[Document(metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.\n\nThis is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.')]

In [3]:
text_splitter = CharacterTextSplitter(chunk_size = 200 , chunk_overlap = 10)
docs = text_splitter.split_documents(documents)
docs

Created a chunk of size 266, which is longer than the specified 200


[Document(metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.'),
 Document(metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.')]

In [4]:
embeddings = OllamaEmbeddings(model ="gemma:2b")  #bydefault is llama2 but we have installed gemm2:b
db = Chroma.from_documents(docs, embeddings)
db 

  embeddings = OllamaEmbeddings(model ="gemma:2b")  #bydefault is llama2 but we have installed gemm2:b


<langchain_chroma.vectorstores.Chroma at 0x18ba7ef7ac0>

In [5]:
# query
query = "What is the main topic of the speech?"
docs = db.similarity_search(query)
docs

[Document(id='a74202be-903e-43a9-a650-dee0716bf047', metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'),
 Document(id='ae9ebe07-ed83-48d9-bc15-380ede664190', metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.')]

In [6]:
docs[0].page_content

'This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'

# storing chromadb as sqlite 

In [7]:
vectordb = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")

Load

In [8]:
db2 = Chroma(persist_directory="./choma_db", embedding_function=embeddings)
docs2 = db2.similarity_search(query)
docs2



[]