Indexing: Inspecting And Managing Doc From VectorStore

In [7]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

In [8]:
embedding = OpenAIEmbeddings(model = "text-embedding-ada-002")

In [9]:
vector_store_from_dir = Chroma(persist_directory="./database", embedding_function=embedding)

In [12]:
vector_store_from_dir.get()

{'ids': ['a1d179ed-70bf-4956-b6b0-34c938f5325c',
  'b68ae137-4cee-4c38-84bb-a8147aeb0906',
  'eb519663-a3c9-4cb1-89e1-c3510ce9c36d',
  '7b1bc15b-e03c-409c-87b2-51d22c842ce9',
  '44a1d1f5-037e-4900-9bf9-cc4a0feac989',
  'baba3d42-1964-46c7-8f43-4b499e41a63e',
  'b62d3b70-7e53-416c-a33a-7807a8d9288e',
  'adf2c2c0-b0f2-49c9-a779-be93b6ba38cc',
  'b213b4ef-043c-4f70-a51a-4546fd61ef8e',
  '3b613a83-d48f-454e-b6a4-d396dbb5543c',
  'bd32c939-7c55-46e6-b049-7e77ca9feab6',
  '9061b3f8-e508-4703-b8a9-f40ec25ed55c',
  '7ab63f7e-c2af-4594-b1fc-e53d11b8c13c',
  'bbb17625-e26e-4ee9-8bb3-41b7c90d3d82',
  '846add44-7ebd-431d-bafb-05826a65eccf',
  '7e98f2f5-e551-47ad-93e4-616ec668e475',
  '098b8488-b5f3-4381-9043-3fb95b73ebca',
  '1e8d70e1-4a14-4145-a248-f0c7a51ffffc',
  'a2d295f3-bcbf-4eb6-9d7f-dfba924fc0fe',
  'c809340d-d42b-4e2d-8df8-9a55c2078082',
  'd47dae5f-4fe9-4cd8-8617-f2e55bfda923'],
 'embeddings': None,
 'documents': ['Alright! So… Let’s discuss the not-so-obvious differences between the ter

In [13]:
vector_store_from_dir.get(ids="b68ae137-4cee-4c38-84bb-a8147aeb0906")

{'ids': ['b68ae137-4cee-4c38-84bb-a8147aeb0906'],
 'embeddings': None,
 'documents': ['Consider the following… You have a huge dataset containing data of various types. Instead of tackling the entire dataset and running the risk of becoming overwhelmed, you separate it into easier to digest chunks and study them individually and examine how they relate to other parts. And that’s analysis in a nutshell. One important thing to remember, however, is that you perform analyses on things that have already happened in the past'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Lecture Title': 'Analysis vs Analytics'}]}

In [17]:
vector_store_from_dir.get(ids="b68ae137-4cee-4c38-84bb-a8147aeb0906", include=["embeddings"])

{'ids': ['b68ae137-4cee-4c38-84bb-a8147aeb0906'],
 'embeddings': array([[-0.00141332,  0.00292613,  0.04131164, ...,  0.00855331,
         -0.02048619, -0.0012812 ]], shape=(1, 1536)),
 'documents': None,
 'uris': None,
 'included': ['embeddings'],
 'data': None,
 'metadatas': None}

doc added to vectorstore

In [18]:
added_document = Document(page_content='Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the similarity of the words, some people believe they share the same meaning, and thus use them interchangeably. Technically, this isn’t correct. There is, in fact, a distinct difference between the two. And the reason for one often being used instead of the other is the lack of a transparent understanding of both. So, let’s clear this up, shall we? First, we will start with analysis', 
                          metadata={'Course Title': 'Introduction to Data and Data Science', 
                                    'Lecture Title': 'Analysis vs Analytics'})

In [21]:
vector_store_from_dir.add_documents([added_document])

['37d6d7d2-8951-4155-9e42-45261f57865b']

In [22]:
vector_store_from_dir.get("37d6d7d2-8951-4155-9e42-45261f57865b")

{'ids': ['37d6d7d2-8951-4155-9e42-45261f57865b'],
 'embeddings': None,
 'documents': ['Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the similarity of the words, some people believe they share the same meaning, and thus use them interchangeably. Technically, this isn’t correct. There is, in fact, a distinct difference between the two. And the reason for one often being used instead of the other is the lack of a transparent understanding of both. So, let’s clear this up, shall we? First, we will start with analysis'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Lecture Title': 'Analysis vs Analytics',
   'Course Title': 'Introduction to Data and Data Science'}]}

In [23]:
updated_document = Document(page_content='Great! We hope we gave you a good idea about the level of applicability of the most frequently used programming and software tools in the field of data science. Thank you for watching!', 
                            metadata={'Course Title': 'Introduction to Data and Data Science', 
                                     'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need'})

In [24]:
vector_store_from_dir.update_document(document_id = "55409552-1943-4892-949a-3b475ff9c840", 
                                           document = updated_document)

In [25]:
vector_store_from_dir.get("37d6d7d2-8951-4155-9e42-45261f57865b")

{'ids': ['37d6d7d2-8951-4155-9e42-45261f57865b'],
 'embeddings': None,
 'documents': ['Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the similarity of the words, some people believe they share the same meaning, and thus use them interchangeably. Technically, this isn’t correct. There is, in fact, a distinct difference between the two. And the reason for one often being used instead of the other is the lack of a transparent understanding of both. So, let’s clear this up, shall we? First, we will start with analysis'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Course Title': 'Introduction to Data and Data Science',
   'Lecture Title': 'Analysis vs Analytics'}]}

In [26]:
vector_store_from_dir.delete("37d6d7d2-8951-4155-9e42-45261f57865b")

In [27]:
vector_store_from_dir.get("37d6d7d2-8951-4155-9e42-45261f57865b")

{'ids': [],
 'embeddings': None,
 'documents': [],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': []}