In [1]:
import faiss
import numpy as np

# Load the trained embeddings
transaction_embeddings = np.load("weights/refined_transaction_embeddings.npy").astype('float32')

# Define FAISS index (L2 distance for similarity search)
dimension = transaction_embeddings.shape[1]  # Number of features in embeddings
index = faiss.IndexFlatL2(dimension)

# Add embeddings to FAISS index
index.add(transaction_embeddings)

# Save FAISS index
faiss.write_index(index, "index/refined_transaction_faiss.index")

print(f"Stored {index.ntotal} embeddings in FAISS index!")


Stored 533009 embeddings in FAISS index!


In [3]:

# Load the FAISS index
index = faiss.read_index("index/refined_transaction_faiss.index")

# Load embeddings to retrieve original data points
transaction_embeddings = np.load("weights/refined_transaction_embeddings.npy").astype("float32")

# Function to query similar transactions
def find_similar_transactions(query_embedding, top_k=5):
    query_embedding = np.array([query_embedding], dtype="float32")  # Reshape for FAISS
    distances, indices = index.search(query_embedding, top_k)  # Retrieve top-k similar transactions
    return distances, indices

# Example query: Use a random transaction
query_id = 12939  # Pick an anomaly transaction ID
query_embedding = transaction_embeddings[query_id]

# Find similar transactions
distances, indices = find_similar_transactions(query_embedding, top_k=5)

# Print results
print(f"Query Transaction ID: {query_id}")
print(f"Similar Transactions IDs: {indices[0]}")
print(f"Distances: {distances[0]}")


Query Transaction ID: 12939
Similar Transactions IDs: [ 12939 278540 105064  83345 179179]
Distances: [0.         0.00075518 0.00075527 0.00075668 0.00075769]
