In [None]:
%pip install chromadb
%pip install llama-index chromadb
%pip install llama-index-vector-stores-chroma

In [None]:

import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.ollama import OllamaEmbedding



In [None]:
documents = SimpleDirectoryReader(input_files=['../data_uber/uber_2021.pdf']).load_data()

In [None]:
len(documents)

In [None]:
db = chromadb.PersistentClient(path="./chroma_db")


In [None]:
chroma_collection = db.get_or_create_collection("first_collection")


In [None]:
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",  # Replace with your desired model
    base_url="http://localhost:11434",  # Ensure Ollama is running at this endpoint
    ollama_additional_kwargs={"mirostat": 0} #Mirostat is a technique for controlling perplexity and balancing the text generation process in large language models (LLMs).
)    

In [None]:
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, embed_model=ollama_embedding)


In [None]:
retriever = index.as_retriever(similarity_top_k=3)
nodes = retriever.retrieve("What does the report say about Uber in 2021?")


In [None]:
  for node in nodes:
      print("Node ID:", node.id_)
      print("Content:", node.get_content())
      print("Metadata:", node.metadata)
      print("------------------------------------")


In [None]:
print(chroma_collection.count())


In [None]:
doc_to_update = chroma_collection.get(limit=1)


In [None]:
doc_to_update

In [None]:
doc_to_update["metadatas"][0] = {
    **doc_to_update["metadatas"][0],
    **{"author": "Muthukumar"},
}

In [None]:
doc_to_update

In [None]:
chroma_collection.update(
    ids=[doc_to_update["ids"][0]], metadatas=[doc_to_update["metadatas"][0]]
)

In [None]:
updated_doc = chroma_collection.get(limit=1)


In [None]:
updated_doc

In [None]:
updated_doc["metadatas"][0]

In [None]:
print("count before", chroma_collection.count())

In [None]:
chroma_collection.delete(ids=[doc_to_update["ids"][0]])


In [None]:
print("count after", chroma_collection.count())
