## Install Libraries

In [None]:
# !pip install langchain chromadb openai tiktoken pypdf langchain_openai langchain-community -q

## Import Libraries

In [1]:
from dotenv import load_dotenv
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

load_dotenv()


True

## Docs

In [2]:
doc1 = Document(
    page_content="Karachi is the largest city in Pakistan, known for its vibrant culture, bustling markets, and beautiful beaches like Clifton. It serves as the country's economic hub and is famous for its diverse cuisine.",
    metadata={"food": "Biryani"}
)

doc2 = Document(
    page_content="Lahore, the heart of Pakistan, is known for its rich history, Mughal architecture, and lively atmosphere. The city is a cultural capital and offers iconic landmarks like the Badshahi Mosque and Lahore Fort.",
    metadata={"food": "Nihari"}
)

doc3 = Document(
    page_content="Islamabad, the capital city of Pakistan, is renowned for its scenic beauty, organized structure, and landmarks like Faisal Mosque and Daman-e-Koh. It offers a serene contrast to the hustle of major cities.",
    metadata={"food": "Chapli Kebab"}
)

doc4 = Document(
    page_content="Faisalabad is a major industrial center in Pakistan, often referred to as the 'Manchester of Pakistan' for its textile production. It has a strong entrepreneurial spirit and growing urban life.",
    metadata={"food": "Samosa"}
)

doc5 = Document(
    page_content="Peshawar, one of the oldest cities in South Asia, reflects a rich blend of culture and history. Its historic Qissa Khawani Bazaar and traditional hospitality are famous.",
    metadata={"food": "Chappal Kebab"}
)

docs = [doc1, doc2, doc3, doc4, doc5]

print(docs)

[Document(metadata={'food': 'Biryani'}, page_content="Karachi is the largest city in Pakistan, known for its vibrant culture, bustling markets, and beautiful beaches like Clifton. It serves as the country's economic hub and is famous for its diverse cuisine."), Document(metadata={'food': 'Nihari'}, page_content='Lahore, the heart of Pakistan, is known for its rich history, Mughal architecture, and lively atmosphere. The city is a cultural capital and offers iconic landmarks like the Badshahi Mosque and Lahore Fort.'), Document(metadata={'food': 'Chapli Kebab'}, page_content='Islamabad, the capital city of Pakistan, is renowned for its scenic beauty, organized structure, and landmarks like Faisal Mosque and Daman-e-Koh. It offers a serene contrast to the hustle of major cities.'), Document(metadata={'food': 'Samosa'}, page_content="Faisalabad is a major industrial center in Pakistan, often referred to as the 'Manchester of Pakistan' for its textile production. It has a strong entreprene

## Chroma Settings

In [3]:
vector_store = Chroma(
    embedding_function=OpenAIEmbeddings(),
    persist_directory='my_chroma_db',
    collection_name='sample'
)

## CRUD Operations on Chroma Vector Store

### **Create** (Add) Documents

In [4]:
# add documents
vector_store.add_documents(docs)

['70bd95fb-b10e-4e8c-bc49-594727f17755',
 '11c3b2f8-8caa-4e4b-b0f9-3fa92942954a',
 'eb4338be-4836-43ce-9d19-41cd4589ab65',
 '0df8cd73-dda4-46fe-bc15-6456f050e30a',
 '77b8f481-14d6-4930-89ab-620232316680']

### **Read** (Get) Documents
* View
* Search
* Filter

#### 1. View Document

In [5]:
# view documents
vector_store.get(include=['embeddings','documents', 'metadatas'])

{'ids': ['70bd95fb-b10e-4e8c-bc49-594727f17755',
  '11c3b2f8-8caa-4e4b-b0f9-3fa92942954a',
  'eb4338be-4836-43ce-9d19-41cd4589ab65',
  '0df8cd73-dda4-46fe-bc15-6456f050e30a',
  '77b8f481-14d6-4930-89ab-620232316680'],
 'embeddings': array([[ 9.08074249e-03,  3.83967743e-03,  2.22462486e-03, ...,
         -2.06594751e-03, -1.59997027e-02, -2.27489881e-02],
        [ 1.20303109e-02,  1.41005851e-02, -5.36109437e-04, ...,
         -1.14717160e-03,  1.01746386e-02, -2.51967479e-02],
        [-4.66658128e-03,  5.06548816e-03, -5.13197295e-03, ...,
          3.60916043e-03, -1.09161269e-02, -3.10007874e-02],
        [-7.76014477e-03, -2.32478278e-03, -9.19479318e-03, ...,
          2.36977860e-02,  7.09682354e-05, -2.62149423e-02],
        [ 1.69164743e-02,  1.46452803e-02,  1.06380600e-02, ...,
         -8.25489842e-05, -1.89918745e-03, -1.04031097e-02]],
       shape=(5, 1536)),
 'documents': ["Karachi is the largest city in Pakistan, known for its vibrant culture, bustling markets, and be

#### 2. Search Documents

In [8]:
# search documents
vector_store.similarity_search(
    query='Who among them has beach?',
    k=2
)

[Document(id='70bd95fb-b10e-4e8c-bc49-594727f17755', metadata={'food': 'Biryani'}, page_content="Karachi is the largest city in Pakistan, known for its vibrant culture, bustling markets, and beautiful beaches like Clifton. It serves as the country's economic hub and is famous for its diverse cuisine."),
 Document(id='77b8f481-14d6-4930-89ab-620232316680', metadata={'food': 'Chappal Kebab'}, page_content='Peshawar, one of the oldest cities in South Asia, reflects a rich blend of culture and history. Its historic Qissa Khawani Bazaar and traditional hospitality are famous.')]

In [None]:
# search with similarity score
# NOTE: The less the score, the more similar the document is to the query.
vector_store.similarity_search_with_score(
    query='Who among them has beach?',
    k=2
)

[(Document(id='70bd95fb-b10e-4e8c-bc49-594727f17755', metadata={'food': 'Biryani'}, page_content="Karachi is the largest city in Pakistan, known for its vibrant culture, bustling markets, and beautiful beaches like Clifton. It serves as the country's economic hub and is famous for its diverse cuisine."),
  0.518361083715898),
 (Document(id='77b8f481-14d6-4930-89ab-620232316680', metadata={'food': 'Chappal Kebab'}, page_content='Peshawar, one of the oldest cities in South Asia, reflects a rich blend of culture and history. Its historic Qissa Khawani Bazaar and traditional hospitality are famous.'),
  0.5638381173985383)]

#### 3. Filtering using Metadata

In [10]:
# meta-data filtering
vector_store.similarity_search_with_score(
    query="",
    filter={"food": "Samosa"},
)

[(Document(id='0df8cd73-dda4-46fe-bc15-6456f050e30a', metadata={'food': 'Samosa'}, page_content="Faisalabad is a major industrial center in Pakistan, often referred to as the 'Manchester of Pakistan' for its textile production. It has a strong entrepreneurial spirit and growing urban life."),
  0.6689453270979357)]

### **Update** Documents

In [13]:
# update documents
updated_doc_samundri = Document(
    page_content=(
        "Samundri is a city in the Faisalabad District of Punjab, Pakistan, "
        "known for its vibrant agricultural markets, historic mosques, and "
        "friendly, close-knit community. The city’s weekly bazaars feature "
        "local farmers trading wheat, sugarcane, and dairy, while its tree-lined "
        "streets and colonial-era architecture reflect a rich heritage."
    ),
    metadata={"food": "Sohan Halwa"}
)

vector_store.update_document(document_id='77b8f481-14d6-4930-89ab-620232316680', document=updated_doc_samundri)


In [14]:
# view documents
vector_store.get(include=['embeddings','documents', 'metadatas'])

{'ids': ['70bd95fb-b10e-4e8c-bc49-594727f17755',
  '11c3b2f8-8caa-4e4b-b0f9-3fa92942954a',
  'eb4338be-4836-43ce-9d19-41cd4589ab65',
  '0df8cd73-dda4-46fe-bc15-6456f050e30a',
  '77b8f481-14d6-4930-89ab-620232316680'],
 'embeddings': array([[ 9.08074249e-03,  3.83967743e-03,  2.22462486e-03, ...,
         -2.06594751e-03, -1.59997027e-02, -2.27489881e-02],
        [ 1.20303109e-02,  1.41005851e-02, -5.36109437e-04, ...,
         -1.14717160e-03,  1.01746386e-02, -2.51967479e-02],
        [-4.66658128e-03,  5.06548816e-03, -5.13197295e-03, ...,
          3.60916043e-03, -1.09161269e-02, -3.10007874e-02],
        [-7.76014477e-03, -2.32478278e-03, -9.19479318e-03, ...,
          2.36977860e-02,  7.09682354e-05, -2.62149423e-02],
        [ 9.61938780e-03, -6.58911653e-03, -1.30272117e-02, ...,
         -1.45636871e-02, -2.09591035e-02, -2.02105641e-02]],
       shape=(5, 1536)),
 'documents': ["Karachi is the largest city in Pakistan, known for its vibrant culture, bustling markets, and be

### **Delete** Documents

In [15]:
# delete document
vector_store.delete(ids=['77b8f481-14d6-4930-89ab-620232316680'])

In [16]:
# view documents
vector_store.get(include=['embeddings','documents', 'metadatas'])

{'ids': ['70bd95fb-b10e-4e8c-bc49-594727f17755',
  '11c3b2f8-8caa-4e4b-b0f9-3fa92942954a',
  'eb4338be-4836-43ce-9d19-41cd4589ab65',
  '0df8cd73-dda4-46fe-bc15-6456f050e30a'],
 'embeddings': array([[ 9.08074249e-03,  3.83967743e-03,  2.22462486e-03, ...,
         -2.06594751e-03, -1.59997027e-02, -2.27489881e-02],
        [ 1.20303109e-02,  1.41005851e-02, -5.36109437e-04, ...,
         -1.14717160e-03,  1.01746386e-02, -2.51967479e-02],
        [-4.66658128e-03,  5.06548816e-03, -5.13197295e-03, ...,
          3.60916043e-03, -1.09161269e-02, -3.10007874e-02],
        [-7.76014477e-03, -2.32478278e-03, -9.19479318e-03, ...,
          2.36977860e-02,  7.09682354e-05, -2.62149423e-02]],
       shape=(4, 1536)),
 'documents': ["Karachi is the largest city in Pakistan, known for its vibrant culture, bustling markets, and beautiful beaches like Clifton. It serves as the country's economic hub and is famous for its diverse cuisine.",
  'Lahore, the heart of Pakistan, is known for its rich h