# Indexing- Inspecting and Managing Documents in a vectorstore

In [1]:
%load_ext dotenv 
%dotenv

In [8]:
from langchain_openai.embeddings import OpenAIEmbeddings
# from langchain_community.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain_core.documents import Document

In [3]:
embedding = OpenAIEmbeddings(model = "text-embedding-ada-002")

In [9]:
vectorstore_from_directory = Chroma(persist_directory = "./intro-to-ds_lectures",
                                    embedding_function = embedding)

In [10]:
vectorstore_from_directory.get()

{'ids': ['9564def8-2eb5-41dc-a72b-ea6f218ccd7f',
  'f9817213-a758-4c2d-ba08-14e11a902e4e',
  'e02d3793-dd23-40b9-8a2e-1f5b630cde49',
  '525b80f7-3d3a-4783-a08c-5ee6d4099399',
  'a2f8d637-0b6f-40da-8ed7-8d1d5b5e9896',
  '860b8ba7-cec0-4721-a23b-ac6a151c6b41',
  'e7aed405-9d8c-48c5-a3e5-ef938341889b',
  '44bbf92a-294d-4bfa-932d-edb7534023d4',
  '9c7ac950-3596-4cd4-a18b-1a9465913bd2',
  '31516de1-c124-4733-b963-4d9bee217a49',
  '0fbcaf76-f918-4141-89f4-38554bd67895',
  'a968576f-bde6-4774-8b3a-5915a1149579',
  'abf50bbc-9df6-4fbe-9792-433567d74bef',
  '2940805b-ccaa-4a8f-969a-7bedd8eac90e',
  '99cfee50-9427-4d6f-9a48-b65acd58f51c',
  'f884be14-9cac-4e5e-ab2a-e50abe6150d7',
  '7fc623f3-dc5a-44d6-b6c9-4076d21e8ce3',
  '69a28796-a128-48e7-8fe9-efa7f284d8e1',
  'ca09488d-414d-4ea6-8dd4-3461813aed5d',
  'c913caff-9adf-4202-8d46-8fb18b526d81'],
 'embeddings': None,
 'documents': ['Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the simi

In [11]:
vectorstore_from_directory.get(ids = "9564def8-2eb5-41dc-a72b-ea6f218ccd7f", include=["embeddings"])

{'ids': ['9564def8-2eb5-41dc-a72b-ea6f218ccd7f'],
 'embeddings': array([[ 0.00478017, -0.01535145,  0.02508651, ...,  0.02121745,
         -0.01364157, -0.00687695]], shape=(1, 1536)),
 'documents': None,
 'uris': None,
 'included': ['embeddings'],
 'data': None,
 'metadatas': None}

In [12]:
added_document = Document(page_content = "Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the similarity of the words, some people believe they share the same meaning, and thus use them interchangeably. Technically, this isn’t correct. There is, in fact, a distinct difference between the two. And the reason for one often being used instead of the other is the lack of a transparent understanding of both. So, let’s clear this up, shall we? First, we will start with analysis",
                          metadata = {"Course Title": "Introduction to Data and Data Science",
                                      "Lecture Title": "Analysis vs Analytics"})

In [14]:
vectorstore_from_directory.add_documents([added_document])

['6023ce4c-7727-487c-ac83-4dd434553844']

In [15]:
vectorstore_from_directory.get("6023ce4c-7727-487c-ac83-4dd434553844")

{'ids': ['6023ce4c-7727-487c-ac83-4dd434553844'],
 'embeddings': None,
 'documents': ['Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the similarity of the words, some people believe they share the same meaning, and thus use them interchangeably. Technically, this isn’t correct. There is, in fact, a distinct difference between the two. And the reason for one often being used instead of the other is the lack of a transparent understanding of both. So, let’s clear this up, shall we? First, we will start with analysis'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Lecture Title': 'Analysis vs Analytics',
   'Course Title': 'Introduction to Data and Data Science'}]}

In [16]:
updated_document = Document(page_content= "More importantly, it will be sufficient for your need to create quick and accurate analyses. However, if your theoretical preparation is strong enough, you will find yourself restricted by software. Knowing a programming language such as R and Python, gives you the freedom to create specific, ad-hoc tools for each project you are working on",
                            metadata = {"Course Title": "Introduction to Data and Data Science",
                                        "Lecture Title": "Programming Languages & Software Employed in Data Science - All the Tools You Need"})

In [17]:
vectorstore_from_directory.update_document(document_id = "6023ce4c-7727-487c-ac83-4dd434553844",
                                           document = updated_document)

In [18]:
vectorstore_from_directory.get("6023ce4c-7727-487c-ac83-4dd434553844")

{'ids': ['6023ce4c-7727-487c-ac83-4dd434553844'],
 'embeddings': None,
 'documents': ['More importantly, it will be sufficient for your need to create quick and accurate analyses. However, if your theoretical preparation is strong enough, you will find yourself restricted by software. Knowing a programming language such as R and Python, gives you the freedom to create specific, ad-hoc tools for each project you are working on'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Course Title': 'Introduction to Data and Data Science',
   'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need'}]}

In [19]:
vectorstore_from_directory.delete("6023ce4c-7727-487c-ac83-4dd434553844")

In [20]:
vectorstore_from_directory.get("6023ce4c-7727-487c-ac83-4dd434553844")

{'ids': [],
 'embeddings': None,
 'documents': [],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': []}