### **Creating Vector Store using FAISS**

In [None]:
%pip install faiss-cpu langchain-huggingface langchain_community


In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

text = ["Welcome to Vectpr Store DB",
        "I will store meningful vectors(numbers)",
        "you can retrieve whenever you want"]

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
embedded_text = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [7]:
embedded = embedded_text.embed_documents(text)
print(embedded)

[[-0.032106392085552216, 0.002132811350747943, -0.10572713613510132, -0.0027789126615971327, 0.022708896547555923, 0.007090781349688768, -0.020647872239351273, 0.06101810559630394, -0.028981618583202362, 0.010639285668730736, 0.05275093391537666, 0.03540519252419472, 0.05330644175410271, -0.03572540357708931, -0.011123436503112316, 0.05160929262638092, 0.001192403957247734, -0.0397406630218029, 0.03894675523042679, 0.02037140540778637, -0.10656916350126266, -0.03931156545877457, 0.022519083693623543, 0.04433653503656387, 0.03342617303133011, 0.00277444114908576, -0.045696888118982315, 0.06245703995227814, -0.003383795265108347, -0.056269388645887375, -0.014225001446902752, 0.039964303374290466, 0.031032124534249306, 0.04175696149468422, 0.025188887491822243, -0.031363144516944885, 0.04212961345911026, 0.026485763490200043, -0.06541060656309128, -0.06682911515235901, 0.0574401319026947, -0.07930654287338257, -0.12291976064443588, 0.045759834349155426, 0.014700564555823803, 0.01728335767

In [8]:
vector_store = FAISS.from_texts(text,embedded_text)

In [9]:
vector_store.save_local("My_First_Vector_DB")

### **Creating Vector Store using Chroma**

In [None]:
%pip install chromadb

In [11]:
# This script requires the 'chromadb' library.
# Install it using: pip install chromadb

import chromadb

def main():
    """
    Main function to demonstrate ChromaDB in-memory usage.
    This script will:
    1. Initialize an in-memory ChromaDB client.
    2. Create a new collection.
    3. Add documents, metadata, and IDs to the collection.
    4. Perform a similarity search query.
    5. Print the query results.
    """

    # --- 1. Setup & Client Initialization ---
    # Initialize an ephemeral (in-memory) ChromaDB client.
    # This client does not save data to disk and is ideal for
    # testing, development, or temporary tasks.
    print("Initializing in-memory ChromaDB client...")
    client = chromadb.Client()
    print("Client initialized successfully.")

    # --- 2. Collection Creation ---
    # Create a new collection named 'demo_collection'.
    # Collections store your vectors, documents, and metadata.
    # If the collection already exists, this will retrieve it.
    try:
        collection_name = "demo_collection"
        collection = client.create_collection(name=collection_name)
        print(f"Collection '{collection_name}' created successfully.")
    except chromadb.errors.UniqueConstraintError:
        print(f"Collection '{collection_name}' already exists. Retrieving it.")
        collection = client.get_collection(name=collection_name)
    except Exception as e:
        print(f"An error occurred during collection creation: {e}")
        return

    # --- 3. Data Population (Adding Documents) ---
    # Add documents to the collection.
    # Each document requires a unique ID.
    # You can also provide corresponding metadata (as dictionaries).
    # ChromaDB will automatically handle embedding generation for the text.
    print("Adding documents to the collection...")

    documents_to_add = [
        "This is a document about Python programming.",
        "ChromaDB is a vector database.",
        "Artificial intelligence is transforming technology.",
        "The quick brown fox jumps over the lazy dog."
    ]

    metadata_to_add = [
        {'source': 'doc-1', 'topic': 'programming'},
        {'source': 'doc-2', 'topic': 'database'},
        {'source': 'doc-3', 'topic': 'ai'},
        {'source': 'doc-4', 'topic': 'general'}
    ]

    ids_to_add = [
        "py_doc_1",
        "db_doc_1",
        "ai_doc_1",
        "fox_doc_1"
    ]

    try:
        collection.add(
            documents=documents_to_add,
            metadatas=metadata_to_add,
            ids=ids_to_add
        )
        print("Documents added successfully.")

        # Verify the count
        count = collection.count()
        print(f"Total documents in collection: {count}")

    except Exception as e:
        print(f"An error occurred while adding documents: {e}")
        return

    # --- 4. Querying the Collection ---
    # Perform a similarity search query.
    # We query for texts similar to "What is AI?"
    # 'n_results' specifies the number of closest results to return.
    query_text = "What is AI?"
    print(f"\nPerforming query for: '{query_text}'")

    try:
        results = collection.query(
            query_texts=[query_text],
            n_results=2  # Request the top 2 most similar documents
        )

        # --- 5. Printing Results ---
        # The 'results' object contains the retrieved documents, distances,
        # metadata, and IDs, nested within lists.
        print("\n--- Query Results ---")
        if results:
            print(results)
        else:
            print("No results found.")

    except Exception as e:
        print(f"An error occurred during the query: {e}")


if __name__ == "__main__":
    """
    Standard Python entry point to run the main function.
    """
    main()

Initializing in-memory ChromaDB client...
Client initialized successfully.
Collection 'demo_collection' created successfully.
Adding documents to the collection...


C:\Users\sathi\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:12<00:00, 6.78MiB/s]


Documents added successfully.
Total documents in collection: 4

Performing query for: 'What is AI?'

--- Query Results ---
{'ids': [['ai_doc_1', 'py_doc_1']], 'embeddings': None, 'documents': [['Artificial intelligence is transforming technology.', 'This is a document about Python programming.']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'source': 'doc-3', 'topic': 'ai'}, {'source': 'doc-1', 'topic': 'programming'}]], 'distances': [[0.8835755586624146, 1.4313362836837769]]}


### **Creating a vector store using chroma and Saving it Locally**

In [12]:
# pip install chromadb
"""
This script demonstrates how to create, populate, and query a
persistent, local vector store using the 'chromadb' library.

The database will be saved to a local directory specified by PERSIST_PATH.
"""

import chromadb
import os  # Imported to show the path, not for destructive operations

def main():
    """
    Main function to run the ChromaDB persistence demonstration.
    """

    # =========================================================================
    # 1. Setup
    # =========================================================================
    print("--- ChromaDB Persistent Client Demonstration ---")

    # Define the path where the persistent database will be stored.
    # This directory will be created in the same location as the script.
    PERSIST_PATH = "./my_local_chroma_db"
    COLLECTION_NAME = "demo_collection"

    print(f"Database will be saved to: {os.path.abspath(PERSIST_PATH)}\n")

    # NOTE ON CLEANUP:
    # ChromaDB will create and manage files within the PERSIST_PATH.
    # To completely remove the database, you must manually delete
    # this directory using file system commands (e.g., 'rm -r' in Linux/macOS
    # or 'rmdir /s' in Windows). This script will not delete it.

    # =========================================================================
    # 2. Persistent Client Initialization
    # =========================================================================
    # Initialize the persistent client.
    # This client saves data to disk at the specified 'path'.
    # If the directory doesn't exist, it will be created.
    try:
        client = chromadb.PersistentClient(path=PERSIST_PATH)
        print("Successfully initialized PersistentClient.")
    except Exception as e:
        print(f"Error initializing persistent client: {e}")
        return

    # =========================================================================
    # 3. Collection Creation
    # =========================================================================
    # Get or create a collection. This operation is idempotent, meaning
    # if the collection already exists in the persistent storage,
    # it will be loaded. Otherwise, a new one will be created.
    try:
        collection = client.get_or_create_collection(name=COLLECTION_NAME)
        print(f"Successfully got or created collection: '{COLLECTION_NAME}'")
    except Exception as e:
        print(f"Error getting or creating collection: {e}")
        return

    # =========================================================================
    # 4. Data Population
    # =========================================================================
    # We will add some sample documents.
    # If this script is run multiple times, ChromaDB's 'add' method
    # (by default) will update existing documents with the same IDs
    # (an "upsert" operation).

    print("\nPopulating collection with documents...")

    try:
        collection.add(
            documents=[
                "This is a sample document about web development.",
                "Today, we are exploring vector databases.",
                "The quick brown fox jumps over the lazy dog.",
                "Python is a popular programming language.",
                "ChromaDB makes similarity search easy."
            ],
            metadatas=[
                {"source": "web_dev_doc", "category": "tech"},
                {"source": "db_doc", "category": "tech"},
                {"source": "proverb_doc", "category": "general"},
                {"source": "python_doc", "category": "tech"},
                {"source": "chroma_doc", "category": "tech"}
            ],
            ids=[
                "doc_id_1",
                "doc_id_2",
                "doc_id_3",
                "doc_id_4",
                "doc_id_5"
            ]
        )

        # Verify the count
        count = collection.count()
        print(f"Collection now contains {count} documents.")

    except Exception as e:
        print(f"Error adding documents: {e}")
        return

    # =========================================================================
    # 5. Query
    # =========================================================================
    # Perform a similarity search query.
    # We are looking for the top 2 documents most similar to the query text.

    query_text = "What database is good for search?"
    print(f"\nPerforming query for: '{query_text}'")

    try:
        results = collection.query(
            query_texts=[query_text],
            n_results=2  # Ask for the top 2 most similar results
        )

        print("\nQuery results:")
        print(results)

    except Exception as e:
        print(f"Error querying collection: {e}")

    print("\n--- Demonstration Complete ---")
    print(f"You can inspect the generated database files in the '{PERSIST_PATH}' directory.")


if __name__ == "__main__":
    main()

--- ChromaDB Persistent Client Demonstration ---
Database will be saved to: d:\Gen AI Project\Social Eagle\LangChain\Script\my_local_chroma_db

Successfully initialized PersistentClient.
Successfully got or created collection: 'demo_collection'

Populating collection with documents...
Collection now contains 5 documents.

Performing query for: 'What database is good for search?'

Query results:
{'ids': [['doc_id_2', 'doc_id_5']], 'embeddings': None, 'documents': [['Today, we are exploring vector databases.', 'ChromaDB makes similarity search easy.']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'category': 'tech', 'source': 'db_doc'}, {'source': 'chroma_doc', 'category': 'tech'}]], 'distances': [[1.1441351175308228, 1.190717101097107]]}

--- Demonstration Complete ---
You can inspect the generated database files in the './my_local_chroma_db' directory.
