### Test if chromaDB works

In [25]:
import chromadb

# initialize a client to interact with the Chroma database. 
client = chromadb.Client()

# A collection in ChromaDB is a structured grouping that stores and organizes embeddings or data, 
# making it easier to perform searches, updates, or deletions on related data items.
collection = client.create_collection(name="new_collection")

In [27]:
# Adding Documents
collection.add(
    documents=[
        "This document is about Beijing",
        "This document is about Vancouver"
    ],
    ids = ['id1', 'id2']
)

In [29]:
# Retrieving All Documents
all_docs = collection.get()
all_docs

{'ids': ['id1', 'id2'],
 'embeddings': None,
 'documents': ['This document is about Beijing',
  'This document is about Vancouver'],
 'uris': None,
 'data': None,
 'metadatas': [None, None],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [64]:
# test if the query works properly.
# the document which is more relevant to the query will be returned first
# relevancy is measured by distance, the lower distance, the more relevant

results = collection.query(
    query_texts=['Query is about UBC'],
    n_results=2
)
results

{'ids': [['id3', 'id4']],
 'embeddings': None,
 'documents': [['This document is about Vancouver',
   'This document is about Beijing']],
 'uris': None,
 'data': None,
 'metadatas': [[{'url': 'https://en.wikipedia.org/wiki/Vancouver'},
   {'url': 'https://en.wikipedia.org/wiki/Beijing'}]],
 'distances': [[1.5409464836120605, 1.7852301597595215]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [66]:
collection.delete(ids=all_docs['ids'])
collection.get()

Delete of nonexisting embedding ID: id1
Delete of nonexisting embedding ID: id2
Delete of nonexisting embedding ID: id1
Delete of nonexisting embedding ID: id2


{'ids': ['id3', 'id4'],
 'embeddings': None,
 'documents': ['This document is about Vancouver',
  'This document is about Beijing'],
 'uris': None,
 'data': None,
 'metadatas': [{'url': 'https://en.wikipedia.org/wiki/Vancouver'},
  {'url': 'https://en.wikipedia.org/wiki/Beijing'}],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [72]:
# test if adding metadata works -> if it works then we can later add portfolio links

collection.add(
    documents=[
        "This document is about Vancouver",
        "This document is about Beijing"
    ],
    ids=["id3", "id4"],
    metadatas=[
        {"url": "https://en.wikipedia.org/wiki/Vancouver"},
        {"url": "https://en.wikipedia.org/wiki/Beijing"}
    ]
)

Insert of existing embedding ID: id3
Insert of existing embedding ID: id4
Add of existing embedding ID: id3
Add of existing embedding ID: id4


In [82]:
results = collection.query(
    query_texts=["Query is about peking"],
    n_results=2
)
results

{'ids': [['id4', 'id3']],
 'embeddings': None,
 'documents': [['This document is about Beijing',
   'This document is about Vancouver']],
 'uris': None,
 'data': None,
 'metadatas': [[{'url': 'https://en.wikipedia.org/wiki/Beijing'},
   {'url': 'https://en.wikipedia.org/wiki/Vancouver'}]],
 'distances': [[0.7394739985466003, 1.3138021230697632]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}