In [1]:
pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp312-cp312-win_amd64.whl.metadata (4.5 kB)
Downloading faiss_cpu-1.9.0-cp312-cp312-win_amd64.whl (14.9 MB)
   ---------------------------------------- 0.0/14.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/14.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/14.9 MB 435.7 kB/s eta 0:00:35
   ---------------------------------------- 0.1/14.9 MB 656.4 kB/s eta 0:00:23
    --------------------------------------- 0.3/14.9 MB 1.9 MB/s eta 0:00:08
   - -------------------------------------- 0.7/14.9 MB 4.0 MB/s eta 0:00:04
   ---- ----------------------------------- 1.6/14.9 MB 6.8 MB/s eta 0:00:02
   ---- ----------------------------------- 1.8/14.9 MB 7.5 MB/s eta 0:00:02
   ----- ---------------------------------- 2.2/14.9 MB 6.6 MB/s eta 0:00:02
   ------ --------------------------------- 2.5/14.9 MB 6.6 MB/s eta 0:00:02
   -------- ------------------------------- 3.0/14.9 MB 7.2 MB/s eta 0:00:02
   -

In [1]:
import numpy as np

# Generating random vectors for demonstration (e.g., 1000 vectors of dimension 128)
num_vectors = 1000
vector_dim = 128
vectors = np.random.random((num_vectors, vector_dim)).astype('float32')


In [3]:
import faiss

# Create an index with L2 distance metric
index = faiss.IndexFlatL2(vector_dim)  # vector_dim should match the dimension of your vectors

# Add vectors to the index
index.add(vectors)

# Check the number of vectors in the index
print("Number of vectors in the index:", index.ntotal)


Number of vectors in the index: 1000


In [5]:
# Generate a query vector (e.g., a random vector to test)
query_vector = np.random.random((1, vector_dim)).astype('float32')

# Search for the 5 nearest neighbors
distances, indices = index.search(query_vector, k=5)

print("Distances:", distances)
print("Indices of nearest neighbors:", indices)


Distances: [[14.675123 14.719946 14.884996 14.973237 15.071972]]
Indices of nearest neighbors: [[549  51 756 978 239]]


In [15]:
# Create a quantizer for training the IVF index
quantizer = faiss.IndexFlatL2(vector_dim)  # Base index for quantization

# Create an IVF index (pass the parameters in the correct order)
ivf_index = faiss.IndexIVFFlat(quantizer, vector_dim, 100)  # 100 is the nlist (number of clusters)

# Train the index using your data vectors
ivf_index.train(vectors)

# Add vectors to the index
ivf_index.add(vectors)

# Set the number of clusters to probe during search
ivf_index.nprobe = 10

# Search using the IVF index
distances, indices = ivf_index.search(query_vector, k=5)

print("Distances:", distances)
print("Indices of nearest neighbors:", indices)


Distances: [[14.973237 15.447727 15.449452 15.590694 15.759472]]
Indices of nearest neighbors: [[978 491 747 677 289]]


In [17]:
# Save the index to disk
faiss.write_index(ivf_index, "vector_database.index")

# Load the index from disk
loaded_index = faiss.read_index("vector_database.index")


In [19]:
pip install fastapi uvicorn


Collecting fastapiNote: you may need to restart the kernel to use updated packages.

  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.32.0-py3-none-any.whl.metadata (6.6 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.2-py3-none-any.whl.metadata (6.0 kB)
Downloading fastapi-0.115.4-py3-none-any.whl (94 kB)
   ---------------------------------------- 0.0/94.7 kB ? eta -:--:--
   ----------------- ---------------------- 41.0/94.7 kB 1.9 MB/s eta 0:00:01
   ---------------------------------------- 94.7/94.7 kB 1.8 MB/s eta 0:00:00
Downloading uvicorn-0.32.0-py3-none-any.whl (63 kB)
   ---------------------------------------- 0.0/63.7 kB ? eta -:--:--
   ---------------------------------------- 63.7/63.7 kB 3.6 MB/s eta 0:00:00
Downloading starlette-0.41.2-py3-none-any.whl (73 kB)
   ---------------------------------------- 0.0/73.3 kB ? eta -:--:--
   ---------------------------------------- 73.

In [1]:
from fastapi import FastAPI
import numpy as np
import faiss

# Load the pre-trained FAISS index
loaded_index = faiss.read_index("vector_database.index")

# Initialize FastAPI app
app = FastAPI()

@app.get("/search")
def search(query: list[float], top_k: int = 5):
    query_vector = np.array([query], dtype='float32')
    distances, indices = loaded_index.search(query_vector, top_k)
    return {"distances": distances.tolist(), "indices": indices.tolist()}

# Run with: uvicorn app:app --reload


In [3]:
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logger.info("FAISS index loaded successfully.")


INFO:__main__:FAISS index loaded successfully.


In [7]:
import numpy as np
import faiss
import logging

# Set the vector dimension (replace with your actual dimension)
vector_dim = 128  # Example: if your vectors have 128 features

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Assuming 'new_vectors' is an array of new vectors to be added
new_vectors = np.random.random((5, vector_dim)).astype('float32')  # Example new vectors
loaded_index.add(new_vectors)
logger.info("New vectors added to the index")


INFO:__main__:New vectors added to the index
