In [None]:
!pip install chromadb -q
!pip install sentence-transformers -q

Youtube Video: https://youtu.be/eCCHDxMaFIk

In [None]:
import chromadb

client = chromadb.Client()

collection = client.create_collection("yt_demo")

By default, Chroma uses the Sentence Transformers all-MiniLM-L6-v2 model to create embeddings.

In [None]:
collection.add(
    documents=["This is a document about cat", "This is a document about car"],
    metadatas=[{"category": "animal"}, {"category": "vehicle"}],
    ids=["id1", "id2"]
)

/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:01<00:00, 71.4MiB/s]


In [None]:
results = collection.query(
    query_texts=["vehicle"],
    n_results=1
)
results

{'ids': [['id2']],
 'embeddings': None,
 'documents': [['This is a document about car']],
 'metadatas': [[{'category': 'vehicle'}]],
 'distances': [[0.8069301247596741]]}

In [None]:
import os

def read_files_from_folder(folder_path):
    file_data = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith(".txt"):
            with open(os.path.join(folder_path, file_name), 'r') as file:
                content = file.read()
                file_data.append({"file_name": file_name, "content": content})

    return file_data

folder_path = "pets"  # your folder path
file_data = read_files_from_folder(folder_path)

for data in file_data:
    print(f"File Name: {data['file_name']}")
    print(f"Content: {data['content']}\n")

File Name: Different Types of Pet Animals.txt
Content: Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.

File Name: Nutrition Needs of Pet Animals.txt
Content: Proper nutrition is vital for the health and wellbeing of pets. Dogs and cats require a balanced diet that includes proteins, carbohydrates, and fats. Some may even have specific dietary needs based on their breed or age. Birds typically thrive on a diet of seeds, fruits, and vegetables, while reptiles have diverse diets ranging from live insects to fresh produce. Fish diets depend greatly on the species, with some needing live food 

In [None]:
documents = []
metadatas = []
ids = []
for index,data in enumerate(file_data):
  documents.append(data['content'])
  metadatas.append({'source':data['file_name']})
  ids.append(str(index+1))

In [None]:
metadatas

[{'source': 'Different Types of Pet Animals.txt'},
 {'source': 'Nutrition Needs of Pet Animals.txt'},
 {'source': 'Health Care for Pets.txt'},
 {'source': 'Training and Behaviour of Pets.txt'},
 {'source': 'The Emotional Bond Between Humans and Pets.txt'}]

In [None]:
from chromadb.config import Settings
client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory="pet_db" # Optional, defaults to .chromadb/ in the current directory
))

In [None]:
pet_collection = client.create_collection("pet_collection")
pet_collection.add(
    documents=documents,
    metadatas=metadatas,
    ids=ids
)

In [None]:
results = pet_collection.query(
    query_texts=["What are the different kinds of pets people commonly own?"],
    n_results=1
)
results

{'ids': [['1']],
 'embeddings': None,
 'documents': [['Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.']],
 'metadatas': [[{'source': 'Different Types of Pet Animals.txt'}]],
 'distances': [[0.7325009703636169]]}

In [None]:
results = pet_collection.query(
    query_texts=["What are the emotional benefits of owning a pet?"],
    n_results=1,
    where_document={"$contains":"reptiles"}
)
results

{'ids': [['5']],
 'embeddings': None,
 'documents': [['Pets offer more than just companionship; they provide emotional support, reduce stress, and can even help their owners lead healthier lives. The bond between pets and their owners is strong, and many people consider their pets as part of the family. This bond can be especially important in times of personal or societal stress, providing comfort and consistency.']],
 'metadatas': [[{'source': 'The Emotional Bond Between Humans and Pets.txt'}]],
 'distances': [[0.6395788192749023]]}

In [None]:
results = pet_collection.query(
    query_texts=["What are the emotional benefits of owning a pet?"],
    n_results=1,
    where={"source": "Training and Behaviour of Pets.txt"},
    # where_document={"$contains":"reptiles"}
)
results

{'ids': [['4']],
 'embeddings': None,
 'documents': [['Training is essential for a harmonious life with pets, particularly for dogs. It helps pets understand their boundaries and makes cohabitation easier for both pets and owners. Training should be based on positive reinforcement. Understanding pet behavior is also important, as changes in behavior can often be a sign of underlying health issues.']],
 'metadatas': [[{'source': 'Training and Behaviour of Pets.txt'}]],
 'distances': [[0.8881876468658447]]}

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-MiniLM-L3-v2')

In [None]:
documents = []
embeddings = []
metadatas = []
ids = []
for index,data in enumerate(file_data):
  documents.append(data['content'])
  embeding = model.encode(data['content']).tolist()
  embeddings.append(embeding)
  metadatas.append({'source':data['file_name']})
  ids.append(str(index+1))

In [None]:
pet_collection_emb = client.create_collection("pet_collection_emb")
pet_collection_emb.add(
    documents=documents,
    embeddings=embeddings,
    metadatas=metadatas,
    ids=ids
)

In [None]:
query = "What are the different kinds of pets people commonly own?"
input_em = model.encode(query).tolist()

results = pet_collection_emb.query(
    query_embeddings=[input_em],
    n_results=1
)
results

{'ids': [['1']],
 'embeddings': None,
 'documents': [['Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.']],
 'metadatas': [[{'source': 'Different Types of Pet Animals.txt'}]],
 'distances': [[12.040446281433105]]}

In [None]:
query = "foods that are recommended for  dogs?"
input_em = model.encode(query).tolist()

results = pet_collection_emb.query(
    query_embeddings=[input_em],
    n_results=1
)
results

{'ids': [['2']],
 'embeddings': None,
 'documents': [['Proper nutrition is vital for the health and wellbeing of pets. Dogs and cats require a balanced diet that includes proteins, carbohydrates, and fats. Some may even have specific dietary needs based on their breed or age. Birds typically thrive on a diet of seeds, fruits, and vegetables, while reptiles have diverse diets ranging from live insects to fresh produce. Fish diets depend greatly on the species, with some needing live food and others subsisting on flakes or pellets.']],
 'metadatas': [[{'source': 'Nutrition Needs of Pet Animals.txt'}]],
 'distances': [[17.143936157226562]]}

In [None]:
# pet_collection_emb.peek()

In [None]:
# !rm -rf  '/content/pet_db'

In [None]:
!zip -r 'pet_db.zip' 'pet_db'

  adding: pet_db/ (stored 0%)
  adding: pet_db/index/ (stored 0%)
  adding: pet_db/index/index_metadata_b0dc65b7-7a78-4b96-82ee-6ecadf899eec.pkl (deflated 14%)
  adding: pet_db/index/index_b0dc65b7-7a78-4b96-82ee-6ecadf899eec.bin (deflated 15%)
  adding: pet_db/index/index_metadata_fb7aeba0-098b-43ac-8392-5c1c4728593b.pkl (deflated 14%)
  adding: pet_db/index/uuid_to_id_b0dc65b7-7a78-4b96-82ee-6ecadf899eec.pkl (deflated 24%)
  adding: pet_db/index/id_to_uuid_fb7aeba0-098b-43ac-8392-5c1c4728593b.pkl (deflated 16%)
  adding: pet_db/index/uuid_to_id_fb7aeba0-098b-43ac-8392-5c1c4728593b.pkl (deflated 22%)
  adding: pet_db/index/index_fb7aeba0-098b-43ac-8392-5c1c4728593b.bin (deflated 14%)
  adding: pet_db/index/id_to_uuid_b0dc65b7-7a78-4b96-82ee-6ecadf899eec.pkl (deflated 16%)
