## Installing dependencies

In [1]:
%pip install langchain chromadb openai tiktoken pypdf langchain_openai langchain-community

Note: you may need to restart the kernel to use updated packages.


## Importing libraries

In [2]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

  from .autonotebook import tqdm as notebook_tqdm


## Creating document object

In [3]:
from langchain.schema import Document

# making some dummy object:
doc1 = Document(
    page_content="Elon Musk is the CEO of SpaceX and Tesla, known for revolutionizing the electric vehicle industry and advancing reusable rocket technology. His work has significantly influenced modern space exploration.",
    metadata={"domain": "Technology & Space"}
)

doc2 = Document(
    page_content="Marie Curie was a pioneering physicist and chemist who discovered radium and polonium. She was the first woman to win a Nobel Prize and remains an icon for women in science.",
    metadata={"domain": "Science & Research"}
)

doc3 = Document(
    page_content="The Renaissance was a period of great cultural and artistic revival in Europe between the 14th and 17th centuries. It gave rise to legendary artists like Leonardo da Vinci and Michelangelo.",
    metadata={"domain": "History & Art"}
)

doc4 = Document(
    page_content="Artificial Intelligence is transforming industries through machine learning, computer vision, and natural language processing. It enables automation, predictive analytics, and personalized digital experiences.",
    metadata={"domain": "Artificial Intelligence"}
)

doc5 = Document(
    page_content="Climate change refers to long-term shifts in global temperatures and weather patterns, primarily caused by human activities such as burning fossil fuels and deforestation.",
    metadata={"domain": "Environment & Sustainability"}
)


In [4]:
docs = [doc1, doc2, doc3, doc4, doc5]

## Creating the vector store obj

In [None]:
# importing openai api key:
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
vector_store = Chroma(
    embedding_function= OpenAIEmbeddings(),
    persist_directory= "vector-db",   # Where to save data locally,
    collection_name='sample'
)

In [None]:
# add documents to the obj:
vector_store.add_documents(docs) # -> the outputs are id regarding to the document obj 

['26fe4706-fde6-49e3-9b9b-9e6c3543dcf1',
 '2d535050-a58b-4ed1-b79f-d486748abf78',
 '5b2d787c-dc4f-47ae-bbf9-cab3d110b7bb',
 '851afa85-724d-4183-808f-f59fcd9d2ca2',
 '60ffdcfa-7a66-4f0e-9bdb-7b5ffb5d0c37']

In [18]:
# view documents:
vector_store.get(include=['embeddings', 'documents', 'metadatas'])

{'ids': ['26fe4706-fde6-49e3-9b9b-9e6c3543dcf1',
  '2d535050-a58b-4ed1-b79f-d486748abf78',
  '5b2d787c-dc4f-47ae-bbf9-cab3d110b7bb',
  '851afa85-724d-4183-808f-f59fcd9d2ca2',
  '60ffdcfa-7a66-4f0e-9bdb-7b5ffb5d0c37'],
 'embeddings': array([[ 0.00786439, -0.01366578, -0.00072127, ..., -0.00775811,
          0.01137773, -0.00239901],
        [-0.0180136 ,  0.00706999, -0.00195896, ..., -0.00445276,
         -0.02716598,  0.00166148],
        [-0.00031726, -0.00952205,  0.01066395, ...,  0.01428932,
          0.00176277, -0.02673163],
        [-0.0128614 , -0.01338105,  0.00444628, ..., -0.01833074,
         -0.02380008,  0.00144691],
        [ 0.00357054, -0.02677006,  0.01098075, ...,  0.00529301,
         -0.01728451, -0.05325305]], shape=(5, 1536)),
 'documents': ['Elon Musk is the CEO of SpaceX and Tesla, known for revolutionizing the electric vehicle industry and advancing reusable rocket technology. His work has significantly influenced modern space exploration.',
  'Marie Curie wa

In [19]:
# search in the document:
vector_store.similarity_search(
    query='who is the ceo of tesla?',
    k=1
)

[Document(id='26fe4706-fde6-49e3-9b9b-9e6c3543dcf1', metadata={'domain': 'Technology & Space'}, page_content='Elon Musk is the CEO of SpaceX and Tesla, known for revolutionizing the electric vehicle industry and advancing reusable rocket technology. His work has significantly influenced modern space exploration.')]

In [20]:
# meta data filtering:
vector_store.similarity_search_with_score(
    query='',
    filter={'domain': 'Science & Research'}
)

[(Document(id='2d535050-a58b-4ed1-b79f-d486748abf78', metadata={'domain': 'Science & Research'}, page_content='Marie Curie was a pioneering physicist and chemist who discovered radium and polonium. She was the first woman to win a Nobel Prize and remains an icon for women in science.'),
  0.6445514559745789)]

In [23]:
#UPDATE DOCUMENT:
update_doc1 = Document(
    page_content='Elon Musk is now worth of 500B dollars and is the richest person in humankind',
    metadata = {'domain':'business and finance'}
)

vector_store.update_document(document_id='26fe4706-fde6-49e3-9b9b-9e6c3543dcf1', document=update_doc1)

In [24]:
# view documents:
vector_store.get(include=['embeddings', 'documents', 'metadatas'])

{'ids': ['26fe4706-fde6-49e3-9b9b-9e6c3543dcf1',
  '2d535050-a58b-4ed1-b79f-d486748abf78',
  '5b2d787c-dc4f-47ae-bbf9-cab3d110b7bb',
  '851afa85-724d-4183-808f-f59fcd9d2ca2',
  '60ffdcfa-7a66-4f0e-9bdb-7b5ffb5d0c37'],
 'embeddings': array([[-0.01114195, -0.01598163,  0.00952872, ..., -0.02052629,
          0.00896377,  0.00224722],
        [-0.0180136 ,  0.00706999, -0.00195896, ..., -0.00445276,
         -0.02716598,  0.00166148],
        [-0.00031726, -0.00952205,  0.01066395, ...,  0.01428932,
          0.00176277, -0.02673163],
        [-0.0128614 , -0.01338105,  0.00444628, ..., -0.01833074,
         -0.02380008,  0.00144691],
        [ 0.00357054, -0.02677006,  0.01098075, ...,  0.00529301,
         -0.01728451, -0.05325305]], shape=(5, 1536)),
 'documents': ['Elon Musk is now worth of 500B dollars and is the richest person in humankind',
  'Marie Curie was a pioneering physicist and chemist who discovered radium and polonium. She was the first woman to win a Nobel Prize and rema

In [25]:
# delete one vector instances:
vector_store.delete(ids=['26fe4706-fde6-49e3-9b9b-9e6c3543dcf1'])

In [26]:
# view documents:
vector_store.get(include=['embeddings', 'documents', 'metadatas'])

{'ids': ['2d535050-a58b-4ed1-b79f-d486748abf78',
  '5b2d787c-dc4f-47ae-bbf9-cab3d110b7bb',
  '851afa85-724d-4183-808f-f59fcd9d2ca2',
  '60ffdcfa-7a66-4f0e-9bdb-7b5ffb5d0c37'],
 'embeddings': array([[-0.0180136 ,  0.00706999, -0.00195896, ..., -0.00445276,
         -0.02716598,  0.00166148],
        [-0.00031726, -0.00952205,  0.01066395, ...,  0.01428932,
          0.00176277, -0.02673163],
        [-0.0128614 , -0.01338105,  0.00444628, ..., -0.01833074,
         -0.02380008,  0.00144691],
        [ 0.00357054, -0.02677006,  0.01098075, ...,  0.00529301,
         -0.01728451, -0.05325305]], shape=(4, 1536)),
 'documents': ['Marie Curie was a pioneering physicist and chemist who discovered radium and polonium. She was the first woman to win a Nobel Prize and remains an icon for women in science.',
  'The Renaissance was a period of great cultural and artistic revival in Europe between the 14th and 17th centuries. It gave rise to legendary artists like Leonardo da Vinci and Michelangelo