In [3]:
import faiss
import numpy as np
import json
from pathlib import Path

# Path to your FAISS index file
faiss_index_path = "./AIC25_BACKEND/Faissindex/faiss_index_vitL.bin"
faiss_index_path = "/home/phucuy2025/Documents/AIC_2025/VBS_system/AIC25_BACKEND/FaissIndex/faiss_index_vitL.bin"
def inspect_faiss_index(index_path):
    """
    Load and inspect a FAISS index file
    """
    try:
        # Load the FAISS index
        print(f"Loading FAISS index from: {index_path}")
        index = faiss.read_index(index_path)
        
        print("=== FAISS INDEX INFORMATION ===")
        print(f"Index type: {type(index).__name__}")
        print(f"Number of vectors: {index.ntotal}")
        print(f"Vector dimension: {index.d}")
        print(f"Is trained: {index.is_trained}")
        
        # Get index description if available
        if hasattr(index, 'index_factory_string'):
            print(f"Index factory string: {index.index_factory_string}")
        
        # For some index types, get additional info
        if hasattr(index, 'nlist'):
            print(f"Number of clusters (nlist): {index.nlist}")
        
        if hasattr(index, 'nprobe'):
            print(f"Number of probes: {index.nprobe}")
            
        print("=" * 40)
        
        return index
        
    except Exception as e:
        print(f"Error loading FAISS index: {e}")
        return None

def search_similar_vectors(index, query_vector=None, k=5):
    """
    Search for similar vectors in the index
    """
    if index is None:
        return
        
    if query_vector is None:
        # Create a random query vector for demonstration
        query_vector = np.random.random((1, index.d)).astype('float32')
        print(f"Using random query vector of dimension {index.d}")
    
    try:
        # Search for k nearest neighbors
        distances, indices = index.search(query_vector, k)
        
        print(f"\n=== TOP {k} SIMILAR VECTORS ===")
        for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
            print(f"Rank {i+1}: Index {idx}, Distance: {dist:.4f}")
            
    except Exception as e:
        print(f"Error during search: {e}")

def get_vector_by_id(index, vector_id):
    """
    Retrieve a specific vector by its ID
    """
    if index is None or vector_id >= index.ntotal:
        print(f"Invalid vector ID: {vector_id}")
        return None
        
    try:
        # Reconstruct vector by ID (if index supports it)
        if hasattr(index, 'reconstruct'):
            vector = index.reconstruct(vector_id)
            print(f"\n=== VECTOR {vector_id} ===")
            print(f"Dimension: {len(vector)}")
            print(f"First 10 values: {vector[:10]}")
            print(f"Vector norm: {np.linalg.norm(vector):.4f}")
            return vector
        else:
            print("This index type doesn't support vector reconstruction")
            return None
            
    except Exception as e:
        print(f"Error retrieving vector {vector_id}: {e}")
        return None

# Main execution
if __name__ == "__main__":
    # Load and inspect the index
    index = inspect_faiss_index(faiss_index_path)
    
    if index is not None:
        # Perform a sample search
        search_similar_vectors(index, k=5)
        
        # Try to get a specific vector (if possible)
        if index.ntotal > 0:
            get_vector_by_id(index, 0)  # Get first vector
            
        # Additional analysis
        print(f"\n=== ADDITIONAL INFO ===")
        print(f"Index size in memory: ~{index.ntotal * index.d * 4 / (1024**2):.2f} MB")
        
        # If you have metadata file, try to load it
        metadata_path = Path(faiss_index_path).parent / "id_to_name_vitL.json"
        if metadata_path.exists():
            print(f"Found metadata file: {metadata_path}")
            try:
                with open(metadata_path, 'r') as f:
                    metadata = json.load(f)
                print(f"Metadata entries: {len(metadata)}")
                # Show first few entries
                for i, (key, value) in enumerate(list(metadata.items())[:5]):
                    print(f"  {key}: {value}")
                if len(metadata) > 5:
                    print(f"  ... and {len(metadata) - 5} more entries")
            except Exception as e:
                print(f"Error loading metadata: {e}")

Loading FAISS index from: /home/phucuy2025/Documents/AIC_2025/VBS_system/AIC25_BACKEND/FaissIndex/faiss_index_vitL.bin
=== FAISS INDEX INFORMATION ===
Index type: IndexFlatIP
Number of vectors: 148982
Vector dimension: 768
Is trained: True
Using random query vector of dimension 768

=== TOP 5 SIMILAR VECTORS ===
Rank 1: Index 62808, Distance: 1.9431
Rank 2: Index 61241, Distance: 1.9100
Rank 3: Index 88775, Distance: 1.8864
Rank 4: Index 97581, Distance: 1.8594
Rank 5: Index 93173, Distance: 1.8409

=== VECTOR 0 ===
Dimension: 768
First 10 values: [-0.0473288   0.0003561   0.03553508 -0.00189429  0.00038645  0.03316383
 -0.01554898  0.00817743 -0.01252604 -0.02030501]
Vector norm: 1.0000

=== ADDITIONAL INFO ===
Index size in memory: ~436.47 MB
Found metadata file: /home/phucuy2025/Documents/AIC_2025/VBS_system/AIC25_BACKEND/FaissIndex/id_to_name_vitL.json
Metadata entries: 148982
  0: Videos_L21_a/L21_V001/f000000.webp
  1: Videos_L21_a/L21_V001/f000048.webp
  2: Videos_L21_a/L21_V001