In [None]:
import pickle
import os
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama

# Path to your vector database
# Use absolute path by going up to parent directories
current_dir = os.path.dirname(os.path.abspath("__file__"))
project_root = os.path.dirname(os.path.dirname(current_dir))
db_path = os.path.join(project_root, "railway_deployment", "data", "vectordb")

# Alternative: Use relative path from current file location
# db_path = os.path.join(os.path.dirname(__file__), "..", "data", "vectordb")

print(f"Database path: {db_path}")
print(f"Path exists: {os.path.exists(db_path)}")

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

Database path: /Users/miftahhadiyannoor/Documents/logistics-rag/railway_deployment/data/vectordb
Path exists: True


In [26]:
# === Verify files exist ===
print("=== Checking Vector Database Files ===")
pkl_file = os.path.join(db_path, "index.pkl")
faiss_file = os.path.join(db_path, "index.faiss")

print(f"PKL file path: {pkl_file}")
print(f"PKL exists: {os.path.exists(pkl_file)}")
print(f"FAISS file path: {faiss_file}")
print(f"FAISS exists: {os.path.exists(faiss_file)}")

if not os.path.exists(pkl_file):
    print(f"\n⚠️  PKL file not found! Available files in {db_path}:")
    if os.path.exists(db_path):
        print(os.listdir(db_path))
    else:
        print(f"Directory doesn't exist: {db_path}")
else:
    print("\n✓ All files found! Proceeding with loading...")

print("\n" + "="*50)
pkl_file = os.path.join(db_path, "index.pkl")
if os.path.exists(pkl_file):
    with open(pkl_file, 'rb') as f:
        pkl_data = pickle.load(f)
    
    print("=== PKL File Contents ===")
    print(f"Type: {type(pkl_data)}")
    print(f"Keys: {pkl_data.keys() if isinstance(pkl_data, dict) else 'Not a dict'}")
    
    # If it's a docstore
    if hasattr(pkl_data, '__dict__'):
        print(f"Attributes: {pkl_data.__dict__.keys()}")
    
    # Try to see documents if it's a docstore
    if isinstance(pkl_data, dict) and 'docstore' in pkl_data:
        docstore = pkl_data['docstore']
        print(f"\nDocstore type: {type(docstore)}")
        if hasattr(docstore, '_dict'):
            print(f"Number of documents: {len(docstore._dict)}")
            # Print first few documents
            for i, (key, doc) in enumerate(list(docstore._dict.items())[:3]):
                print(f"\nDocument {i+1}:")
                print(f"  Key: {key}")
                print(f"  Content preview: {doc.page_content[:200]}...")
else:
    print(f"❌ PKL file not found at {pkl_file}")

=== Checking Vector Database Files ===
PKL file path: /Users/miftahhadiyannoor/Documents/logistics-rag/railway_deployment/data/vectordb/index.pkl
PKL exists: True
FAISS file path: /Users/miftahhadiyannoor/Documents/logistics-rag/railway_deployment/data/vectordb/index.faiss
FAISS exists: True

✓ All files found! Proceeding with loading...

=== PKL File Contents ===
Type: <class 'tuple'>
Keys: Not a dict


In [23]:
# === 2. Inspect .faiss file (index info) ===
faiss_file = os.path.join(db_path, "index.faiss")
if os.path.exists(faiss_file):
    print("\n=== FAISS File Info ===")
    print(f"File size: {os.path.getsize(faiss_file)} bytes")
    
    # You need to load it with embeddings to see more details
    try:
        import faiss
        index = faiss.read_index(faiss_file)
        print(f"Index type: {type(index)}")
        print(f"Number of vectors: {index.ntotal}")
        print(f"Vector dimension: {index.d}")
        print(f"Is trained: {index.is_trained}")
    except ImportError:
        print("Install faiss-cpu to read index details: pip install faiss-cpu")
else:
    print(f"FAISS file not found at {faiss_file}")

FAISS file not found at railway_deployment/data/vectordb/index.faiss


In [24]:
# === 3. Load complete FAISS database with LangChain ===
print("\n=== Loading FAISS Database ===")
embeddings = OllamaEmbeddings(model="llama3")

try:
    db = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
    
    print(f"Database loaded successfully")
    print(f"Number of documents in index: {db.index.ntotal}")
    
    # Get the docstore
    if hasattr(db, 'docstore'):
        docstore = db.docstore
        if hasattr(docstore, '_dict'):
            docs = list(docstore._dict.values())
            print(f"Total documents in docstore: {len(docs)}")
            
            # Show sample documents
            print("\n=== Sample Documents ===")
            for i, doc in enumerate(docs[:3]):
                print(f"\nDocument {i+1}:")
                print(f"  Content: {doc.page_content[:300]}...")
                print(f"  Metadata: {doc.metadata}")

# Test similarity search
    print("\n=== Testing Similarity Search ===")
    test_query = "test query"
    results = db.similarity_search(test_query, k=2)
    print(f"Found {len(results)} similar documents for query: '{test_query}'")
    for i, doc in enumerate(results):
        print(f"\nResult {i+1}:")
        print(f"  Content: {doc.page_content[:200]}...")
        print(f"  Metadata: {doc.metadata}")
        
except Exception as e:
    print(f"Error loading database: {e}")


=== Loading FAISS Database ===
Error loading database: Error in faiss::FileIOReader::FileIOReader(const char *) at /Users/runner/work/faiss-wheels/faiss-wheels/third-party/faiss/faiss/impl/io.cpp:70: Error: 'f' failed: could not open railway_deployment/data/vectordb/index.faiss for reading: No such file or directory
