In [1]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
import warnings
warnings.filterwarnings("ignore") 

In [2]:
data_path = r"D:\data\text\pets"

def load_docs(data_path):
    loader = DirectoryLoader(data_path)
    documents = loader.load()
    return documents

documents = load_docs(data_path)
len(documents)

5

In [3]:
def split_docs(documents,chunk_size=1000,chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

docs = split_docs(documents)
len(docs)

5

In [4]:
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [5]:
db = Chroma.from_documents(docs, embeddings)

In [6]:
query = "What are the different kinds of pets people commonly own?"
matching_docs = db.similarity_search(query)

In [7]:
len(matching_docs)

4

In [8]:
matching_docs[0]

Document(page_content='Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.', metadata={'source': 'D:\\data\\text\\pets\\Different Types of Pet Animals.txt'})

In [9]:
matching_docs[0].page_content

'Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.'

In [10]:
matching_docs = db.similarity_search_with_score(query,k=2)
matching_docs

[(Document(page_content='Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.', metadata={'source': 'D:\\data\\text\\pets\\Different Types of Pet Animals.txt'}),
  0.7325010299682617),
 (Document(page_content='Pets offer more than just companionship; they provide emotional support, reduce stress, and can even help their owners lead healthier lives. The bond between pets and their owners is strong, and many people consider their pets as part of the family. This bond can be especially important in times of personal or societal stress, providing comfort and consistency.', metadata={'source': 'D:\\

In [11]:
persist_directory = "chroma_db"

vectordb = Chroma.from_documents(
    documents=docs, embedding=embeddings, persist_directory=persist_directory
)

vectordb.persist()

In [12]:
new_db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

In [13]:
matching_docs = new_db.similarity_search_with_score(query)
matching_docs[0]

(Document(page_content='Pet animals come in all shapes and sizes, each suited to different lifestyles and home environments. Dogs and cats are the most common, known for their companionship and unique personalities. Small mammals like hamsters, guinea pigs, and rabbits are often chosen for their low maintenance needs. Birds offer beauty and song, and reptiles like turtles and lizards can make intriguing pets. Even fish, with their calming presence, can be wonderful pets.', metadata={'source': 'D:\\data\\text\\pets\\Different Types of Pet Animals.txt'}),
 0.7325009836931581)