In [10]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings

In [11]:
load_dotenv()

embeddings=HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
)
doc1 = Document(
    page_content="""
LangChain is a framework designed to help developers build applications
powered by large language models (LLMs). It provides abstractions for
prompt management, chains, agents, memory, and integrations with
vector databases.
""",
    metadata={
        "source": "langchain_intro.txt",
        "topic": "LLM frameworks",
        "id": 1
    }
)

doc2 = Document(
    page_content="""
Retrieval-Augmented Generation (RAG) combines information retrieval
with text generation. Relevant documents are retrieved from a vector
database and passed as context to an LLM to produce grounded answers.
""",
    metadata={
        "source": "rag_overview.txt",
        "topic": "RAG",
        "id": 2
    }
)

doc3 = Document(
    page_content="""
Chroma is an open-source vector database optimized for storing and
querying embeddings. It supports persistence, metadata filtering,
and fast similarity search for AI applications.
""",
    metadata={
        "source": "chroma_db.txt",
        "topic": "Vector databases",
        "id": 3
    }
)

doc4 = Document(
    page_content="""
Embeddings convert text into dense numerical vectors that capture
semantic meaning. These vectors enable similarity search, clustering,
and recommendation systems.
""",
    metadata={
        "source": "embeddings.txt",
        "topic": "Embeddings",
        "id": 4
    }
)

doc5 = Document(
    page_content="""
Python is widely used in AI and data science due to its simplicity,
rich ecosystem of libraries, and strong community support.
""",
    metadata={
        "source": "python_ai.txt",
        "topic": "Programming language",
        "id": 5
    }
)


docs=[doc1,doc2,doc3,doc4,doc5]


vector_store=Chroma(
    embedding_function=embeddings,
    persist_directory="chroma_db",
    collection_name="sample"
)

vector_store.add_documents(docs)
vector_store.get(include=["documents","metadatas","embeddings"])


query="Who among there are the best players in the world?"
result=vector_store.similarity_search(
    query=query,
    k=3
)


print(result)


result=vector_store.similarity_search_with_score(
    query=query,
    k=3
)




[Document(id='c70093c0-3a8e-4775-a05a-7af88cd49773', metadata={'source': 'chroma_db.txt', 'id': 3, 'topic': 'Vector databases'}, page_content='\nChroma is an open-source vector database optimized for storing and\nquerying embeddings. It supports persistence, metadata filtering,\nand fast similarity search for AI applications.\n'), Document(id='0756ccdd-81ce-40df-ad6c-4a8cc49687ff', metadata={'topic': 'LLM frameworks', 'id': 1, 'source': 'langchain_intro.txt'}, page_content='\nLangChain is a framework designed to help developers build applications\npowered by large language models (LLMs). It provides abstractions for\nprompt management, chains, agents, memory, and integrations with\nvector databases.\n'), Document(id='9d55ade0-80c9-471e-bc2f-0f59626cd306', metadata={'topic': 'RAG', 'id': 2, 'source': 'rag_overview.txt'}, page_content='\nRetrieval-Augmented Generation (RAG) combines information retrieval\nwith text generation. Relevant documents are retrieved from a vector\ndatabase and 

In [None]:
#create the hugging face embeddings
#create a list of the docs
#Define the vector store embedding_function, the persist_directory and collection_name
#create the vector store
