In [2]:
import os
import chromadb
import numpy as np
from langchain.vectorstores import FAISS,Chroma
from langchain_huggingface import HuggingFaceEmbeddings

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
def load_vectors(file_path):
    """
    Load vectors from FAISS, ChromaDB, or file (.npy, .csv, .json).
    """
    ext = os.path.splitext(file_path)[1]

    if ext == ".npy":
        return np.load(file_path)

    elif ext == ".csv":
        return np.loadtxt(file_path, delimiter=",")

    elif ext == ".json":
        import json
        with open(file_path, "r") as f:
            return np.array(json.load(f))

    elif ext == ".faiss":
        return load_faiss_vectors(file_path)

    elif "chroma" in file_path.lower():
        return load_chroma_vectors(file_path)

    else:
        raise ValueError("Unsupported format. Use .npy, .csv, .json, FAISS, or ChromaDB.")

def load_faiss_vectors(index_file):
    """
    Load vectors from a FAISS index file.
    """
    vector_store = FAISS.load_local("path_to_your_faiss_index", embeddings)
    retriever = vector_store.as_retriever()

def load_chroma_vectors(db_path, collection_name="rag_collection"):
    """
    Load vectors from a ChromaDB persistent store.
    """
    persist_directory = "Stored_vectors/chroma"
    vector_store = Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings
    )

    retriever = vector_store.as_retriever()

def store_vectors_in_chroma(vectors, collection_name="rag_collection"):
    """
    Store extracted vectors in ChromaDB for future RAG.
    """
    chroma_client = chromadb.PersistentClient(path="./chroma_db")
    collection = chroma_client.get_or_create_collection(collection_name)

    ids = [str(i) for i in range(len(vectors))]
    collection.add(embeddings=vectors.tolist(), ids=ids)

    print(f"Stored {len(vectors)} vectors in ChromaDB.")
    return collection

def query_chroma(collection, query_vector, top_k=5):
    """
    Query ChromaDB to find the most similar vectors.
    """
    results = collection.query(query_embeddings=[query_vector.tolist()], n_results=top_k)
    return results

if __name__ == "__main__":
    file_path = input("Enter the path of the stored vector database (FAISS, Chroma, .npy, .csv, .json): ")

    try:
        vectors = load_vectors(file_path)

        collection = store_vectors_in_chroma(vectors)

        query_vector = vectors[0]
        results = query_chroma(collection, query_vector)

        print("Query Results:", results)

    except Exception as e:
        print("Error:", str(e))