In [23]:
import psycopg2
import random

DB_CONFIG = {
    "dbname": "face_db",
    "user": "soubhikghosh",
    "password": "99ghosh@",  # Provide a password if set
    "host": "localhost",
    "port": 5432
}

CONFIDENCE_THRESHOLD = 0.7

def calculate_accuracy_without_feedback():
    """Calculate accuracy using a split of face_embeddings table."""
    conn = psycopg2.connect(**DB_CONFIG)
    cursor = conn.cursor()

    # Fetch all embeddings and phone numbers
    cursor.execute("SELECT id, embedding, phone_number FROM face_embeddings;")
    embeddings_data = cursor.fetchall()

    if len(embeddings_data) < 2:
        print("Not enough data to split.")
        return {"accuracy": 0.0, "total_queries": 0}

    # Shuffle and split data into two parts: queries and database
    random.shuffle(embeddings_data)
    split_index = len(embeddings_data) // 3
    query_data = embeddings_data[:split_index]
    db_data = embeddings_data[split_index:]

    # Insert database embeddings into a temporary table
    cursor.execute("DROP TABLE IF EXISTS temp_db_embeddings;")
    cursor.execute("""
    CREATE TEMP TABLE temp_db_embeddings (
        id integer,
        embedding vector(512),
        phone_number character varying(10)
    );
    """)
    cursor.executemany(
        "INSERT INTO temp_db_embeddings (id, embedding, phone_number) VALUES (%s, %s, %s);",
        db_data,
    )
    conn.commit()

    # Perform ANN search and calculate accuracy
    correct_predictions = 0
    total_queries = len(query_data)

    for query_id, query_embedding, query_phone in query_data:
        query_embedding_pgvector = query_embedding  # Keep it as a list of floats

        sql = """
        SELECT phone_number, (embedding <=> %s) AS distance
        FROM temp_db_embeddings
        ORDER BY distance
        LIMIT 1;
        """

        # Execute the query, passing the embedding as a parameter
        cursor.execute(sql, (query_embedding_pgvector,))
        result = cursor.fetchone()

        
        if result:
            predicted_phone, distance = result
            confidence = 1.0 - float(distance)
            if predicted_phone == query_phone:
                correct_predictions += 1

    # Drop temporary table
    cursor.execute("DROP TABLE IF EXISTS temp_db_embeddings;")
    conn.commit()

    cursor.close()
    conn.close()

    accuracy = correct_predictions / total_queries if total_queries > 0 else 0.0

    print ("correct_predictions:", correct_predictions)
    print ("total_queries:", total_queries)

    return {
        "accuracy": accuracy,
        "total_queries": total_queries,
    }


# Run the function and print metrics
if __name__ == "__main__":
    metrics = calculate_accuracy_without_feedback()
    print(f"Accuracy: {metrics['accuracy']:.2%}")
    print(f"Total Queries: {metrics['total_queries']}")


correct_predictions: 2439
total_queries: 4831
Accuracy: 50.49%
Total Queries: 4831


In [19]:
import numpy as np
import ast
import faiss
import psycopg2
import random

DB_CONFIG = {
    "dbname": "face_db",
    "user": "soubhikghosh",
    "password": "99ghosh@",  # Provide a password if set
    "host": "localhost",
    "port": 5432
}

CONFIDENCE_THRESHOLD = 0.85

def parse_embedding(embedding_str):
    """
    Converts a string representation of a list into a numpy array.
    """
    try:
        # Use `ast.literal_eval` to safely parse the string into a Python list
        embedding_list = ast.literal_eval(embedding_str)
        return np.array(embedding_list, dtype=np.float32)
    except Exception as e:
        print(f"Error parsing embedding: {e}")
        return None

def fetch_embeddings():
    """Fetch embeddings and phone numbers from the database."""
    conn = psycopg2.connect(**DB_CONFIG)
    cursor = conn.cursor()
    cursor.execute("SELECT id, embedding, phone_number FROM face_embeddings;")
    embeddings_data = cursor.fetchall()
    cursor.close()
    conn.close()
    
    # Parse embeddings and filter out invalid rows
    parsed_data = []
    for row in embeddings_data:
        id, embedding_str, phone_number = row
        embedding = parse_embedding(embedding_str)
        if embedding is not None:
            parsed_data.append((id, embedding, phone_number))
    return parsed_data

def build_faiss_index(embeddings):
    """Build a FAISS index for approximate nearest neighbor search."""
    dimension = len(embeddings[0])  # Assuming all embeddings have the same dimension
    index = faiss.IndexFlatL2(dimension)  # L2 distance (Euclidean distance)
    index.add(np.array(embeddings, dtype=np.float32))  # Add embeddings to the index
    return index

def calculate_accuracy_with_faiss():
    """Calculate accuracy using FAISS for ANN search."""
    embeddings_data = fetch_embeddings()

    if len(embeddings_data) < 2:
        print("Not enough data to split.")
        return {"accuracy": 0.0, "total_queries": 0}

    # Shuffle and split data into two parts: queries and database
    random.shuffle(embeddings_data)
    split_index = len(embeddings_data) // 3
    query_data = embeddings_data[:split_index]
    db_data = embeddings_data[split_index:]

    # Prepare FAISS index
    db_embeddings = [embedding for _, embedding, _ in db_data]
    db_phone_numbers = [phone_number for _, _, phone_number in db_data]
    index = build_faiss_index(db_embeddings)

    # Perform ANN search and calculate accuracy
    correct_predictions = 0
    total_queries = len(query_data)

    for query_id, query_embedding, query_phone in query_data:
        query_embedding_np = np.array(query_embedding, dtype=np.float32).reshape(1, -1)

        # Search in FAISS index
        distances, indices = index.search(query_embedding_np, 1)  # Find the nearest neighbor
        nearest_idx = indices[0][0]
        predicted_phone = db_phone_numbers[nearest_idx]
        confidence = 1.0 - distances[0][0]

        if predicted_phone == query_phone and confidence >= CONFIDENCE_THRESHOLD:
            correct_predictions += 1

    accuracy = correct_predictions / total_queries if total_queries > 0 else 0.0
    print ("correct_predictions:", correct_predictions)
    print ("total_queries:", total_queries)



    return {
        "accuracy": accuracy,
        "total_queries": total_queries,
    }

# Run the function and print metrics
if __name__ == "__main__":
    metrics = calculate_accuracy_with_faiss()
    print(f"Accuracy: {metrics['accuracy']:.2%}")
    print(f"Total Queries: {metrics['total_queries']}")


correct_predictions: 165
total_queries: 4831
Accuracy: 3.42%
Total Queries: 4831
