### **Creating Vector Store using FAISS**

In [3]:
%pip install faiss-cpu
%pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-core<2.0.0,>=1.0.1 (from langchain_community)
  Downloading langchain_core-1.0.5-py3-none-any.whl.metadata (3.6 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain_community)
  Downloading langchain_classic-1.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain_community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting langch

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

text = ["Welcome to Vectpr Store DB",
        "I will store meningful vectors(numbers)",
        "you can retrieve whenever you want"]

In [3]:
embedded_text = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# embedded = embedded_text.embed_documents(text)

In [4]:
# print(embedded)

[[-0.032106343656778336, 0.002132844179868698, -0.1057271808385849, -0.0027789403684437275, 0.022708935663104057, 0.007090796250849962, -0.020647825673222542, 0.061018090695142746, -0.02898160181939602, 0.01063928846269846, 0.05275096371769905, 0.03540520742535591, 0.0533064529299736, -0.03572540730237961, -0.011123412288725376, 0.05160929262638092, 0.0011924119899049401, -0.03974062576889992, 0.038946717977523804, 0.020371418446302414, -0.10656916350126266, -0.039311591535806656, 0.022519076243042946, 0.04433654621243477, 0.0334261991083622, 0.002774439286440611, -0.045696891844272614, 0.062457066029310226, -0.00338380946777761, -0.05626942217350006, -0.014224983751773834, 0.03996429592370987, 0.03103213757276535, 0.04175694286823273, 0.025188924744725227, -0.031363219022750854, 0.04212960600852966, 0.0264857467263937, -0.06541061401367188, -0.06682910025119781, 0.0574401430785656, -0.07930652052164078, -0.12291982024908066, 0.045759834349155426, 0.014700529165565968, 0.01728339120745

In [5]:
vector_store = FAISS.from_texts(text,embedded_text)

In [6]:
vector_store.save_local("My_First_Vector_DB")

### **Creating Vector Store using Chroma**

In [7]:
%pip install chromadb

Collecting chromadb
  Downloading chromadb-1.3.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.38.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading PyPika-0.48.9.tar.gz (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?

In [1]:
# This script requires the 'chromadb' library.
# Install it using: pip install chromadb

import chromadb

def main():
    """
    Main function to demonstrate ChromaDB in-memory usage.
    This script will:
    1. Initialize an in-memory ChromaDB client.
    2. Create a new collection.
    3. Add documents, metadata, and IDs to the collection.
    4. Perform a similarity search query.
    5. Print the query results.
    """

    # --- 1. Setup & Client Initialization ---
    # Initialize an ephemeral (in-memory) ChromaDB client.
    # This client does not save data to disk and is ideal for
    # testing, development, or temporary tasks.
    print("Initializing in-memory ChromaDB client...")
    client = chromadb.Client()
    print("Client initialized successfully.")

    # --- 2. Collection Creation ---
    # Create a new collection named 'demo_collection'.
    # Collections store your vectors, documents, and metadata.
    # If the collection already exists, this will retrieve it.
    try:
        collection_name = "demo_collection"
        collection = client.create_collection(name=collection_name)
        print(f"Collection '{collection_name}' created successfully.")
    except chromadb.errors.UniqueConstraintError:
        print(f"Collection '{collection_name}' already exists. Retrieving it.")
        collection = client.get_collection(name=collection_name)
    except Exception as e:
        print(f"An error occurred during collection creation: {e}")
        return

    # --- 3. Data Population (Adding Documents) ---
    # Add documents to the collection.
    # Each document requires a unique ID.
    # You can also provide corresponding metadata (as dictionaries).
    # ChromaDB will automatically handle embedding generation for the text.
    print("Adding documents to the collection...")

    documents_to_add = [
        "This is a document about Python programming.",
        "ChromaDB is a vector database.",
        "Artificial intelligence is transforming technology.",
        "The quick brown fox jumps over the lazy dog."
    ]

    metadata_to_add = [
        {'source': 'doc-1', 'topic': 'programming'},
        {'source': 'doc-2', 'topic': 'database'},
        {'source': 'doc-3', 'topic': 'ai'},
        {'source': 'doc-4', 'topic': 'general'}
    ]

    ids_to_add = [
        "py_doc_1",
        "db_doc_1",
        "ai_doc_1",
        "fox_doc_1"
    ]

    try:
        collection.add(
            documents=documents_to_add,
            metadatas=metadata_to_add,
            ids=ids_to_add
        )
        print("Documents added successfully.")

        # Verify the count
        count = collection.count()
        print(f"Total documents in collection: {count}")

    except Exception as e:
        print(f"An error occurred while adding documents: {e}")
        return

    # --- 4. Querying the Collection ---
    # Perform a similarity search query.
    # We query for texts similar to "What is AI?"
    # 'n_results' specifies the number of closest results to return.
    query_text = "What is AI?"
    print(f"\nPerforming query for: '{query_text}'")

    try:
        results = collection.query(
            query_texts=[query_text],
            n_results=2  # Request the top 2 most similar documents
        )

        # --- 5. Printing Results ---
        # The 'results' object contains the retrieved documents, distances,
        # metadata, and IDs, nested within lists.
        print("\n--- Query Results ---")
        if results:
            print(results)
        else:
            print("No results found.")

    except Exception as e:
        print(f"An error occurred during the query: {e}")


if __name__ == "__main__":
    """
    Standard Python entry point to run the main function.
    """
    main()

Initializing in-memory ChromaDB client...
Client initialized successfully.
Collection 'demo_collection' created successfully.
Adding documents to the collection...


/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:00<00:00, 105MiB/s]


Documents added successfully.
Total documents in collection: 4

Performing query for: 'What is AI?'

--- Query Results ---
{'ids': [['ai_doc_1', 'py_doc_1']], 'embeddings': None, 'documents': [['Artificial intelligence is transforming technology.', 'This is a document about Python programming.']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'topic': 'ai', 'source': 'doc-3'}, {'topic': 'programming', 'source': 'doc-1'}]], 'distances': [[0.8835755586624146, 1.4313362836837769]]}


### **Creating a vector store using chroma and Saving it Locally**

In [2]:
# pip install chromadb
"""
This script demonstrates how to create, populate, and query a
persistent, local vector store using the 'chromadb' library.

The database will be saved to a local directory specified by PERSIST_PATH.
"""

import chromadb
import os  # Imported to show the path, not for destructive operations

def main():
    """
    Main function to run the ChromaDB persistence demonstration.
    """

    # =========================================================================
    # 1. Setup
    # =========================================================================
    print("--- ChromaDB Persistent Client Demonstration ---")

    # Define the path where the persistent database will be stored.
    # This directory will be created in the same location as the script.
    PERSIST_PATH = "./my_local_chroma_db"
    COLLECTION_NAME = "demo_collection"

    print(f"Database will be saved to: {os.path.abspath(PERSIST_PATH)}\n")

    # NOTE ON CLEANUP:
    # ChromaDB will create and manage files within the PERSIST_PATH.
    # To completely remove the database, you must manually delete
    # this directory using file system commands (e.g., 'rm -r' in Linux/macOS
    # or 'rmdir /s' in Windows). This script will not delete it.

    # =========================================================================
    # 2. Persistent Client Initialization
    # =========================================================================
    # Initialize the persistent client.
    # This client saves data to disk at the specified 'path'.
    # If the directory doesn't exist, it will be created.
    try:
        client = chromadb.PersistentClient(path=PERSIST_PATH)
        print("Successfully initialized PersistentClient.")
    except Exception as e:
        print(f"Error initializing persistent client: {e}")
        return

    # =========================================================================
    # 3. Collection Creation
    # =========================================================================
    # Get or create a collection. This operation is idempotent, meaning
    # if the collection already exists in the persistent storage,
    # it will be loaded. Otherwise, a new one will be created.
    try:
        collection = client.get_or_create_collection(name=COLLECTION_NAME)
        print(f"Successfully got or created collection: '{COLLECTION_NAME}'")
    except Exception as e:
        print(f"Error getting or creating collection: {e}")
        return

    # =========================================================================
    # 4. Data Population
    # =========================================================================
    # We will add some sample documents.
    # If this script is run multiple times, ChromaDB's 'add' method
    # (by default) will update existing documents with the same IDs
    # (an "upsert" operation).

    print("\nPopulating collection with documents...")

    try:
        collection.add(
            documents=[
                "This is a sample document about web development.",
                "Today, we are exploring vector databases.",
                "The quick brown fox jumps over the lazy dog.",
                "Python is a popular programming language.",
                "ChromaDB makes similarity search easy."
            ],
            metadatas=[
                {"source": "web_dev_doc", "category": "tech"},
                {"source": "db_doc", "category": "tech"},
                {"source": "proverb_doc", "category": "general"},
                {"source": "python_doc", "category": "tech"},
                {"source": "chroma_doc", "category": "tech"}
            ],
            ids=[
                "doc_id_1",
                "doc_id_2",
                "doc_id_3",
                "doc_id_4",
                "doc_id_5"
            ]
        )

        # Verify the count
        count = collection.count()
        print(f"Collection now contains {count} documents.")

    except Exception as e:
        print(f"Error adding documents: {e}")
        return

    # =========================================================================
    # 5. Query
    # =========================================================================
    # Perform a similarity search query.
    # We are looking for the top 2 documents most similar to the query text.

    query_text = "What database is good for search?"
    print(f"\nPerforming query for: '{query_text}'")

    try:
        results = collection.query(
            query_texts=[query_text],
            n_results=2  # Ask for the top 2 most similar results
        )

        print("\nQuery results:")
        print(results)

    except Exception as e:
        print(f"Error querying collection: {e}")

    print("\n--- Demonstration Complete ---")
    print(f"You can inspect the generated database files in the '{PERSIST_PATH}' directory.")


if __name__ == "__main__":
    main()

--- ChromaDB Persistent Client Demonstration ---
Database will be saved to: /content/my_local_chroma_db

Successfully initialized PersistentClient.
Successfully got or created collection: 'demo_collection'

Populating collection with documents...
Collection now contains 5 documents.

Performing query for: 'What database is good for search?'

Query results:
{'ids': [['doc_id_2', 'doc_id_5']], 'embeddings': None, 'documents': [['Today, we are exploring vector databases.', 'ChromaDB makes similarity search easy.']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'source': 'db_doc', 'category': 'tech'}, {'source': 'chroma_doc', 'category': 'tech'}]], 'distances': [[1.1441351175308228, 1.190717101097107]]}

--- Demonstration Complete ---
You can inspect the generated database files in the './my_local_chroma_db' directory.
