In [None]:
!pip3 install numpy faiss-gpu

In [None]:
!pip3 install "tensorflow<2.11"

: 

In [None]:
# Verify the installation:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

In [None]:
import pandas as pd
import numpy as np
import faiss
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import os

class EfficientFaceMatcher:
    def __init__(self, dataset_path):
        # Load dataset
        self.dataset = pd.read_csv(dataset_path)
        
        # Extract embeddings for all images
        self.database_embeddings = self._extract_all_embeddings()
        
        # Normalize embeddings
        self.database_embeddings = self._normalize_embeddings(self.database_embeddings)
        
        # Create FAISS index
        dimension = self.database_embeddings.shape[1]
        self.index = faiss.IndexFlatIP(dimension)
        self.index.add(self.database_embeddings)
    
    def _extract_all_embeddings(self):
        """
        Extract embeddings for all images in the dataset
        
        Returns:
            numpy.ndarray: Array of embeddings for all images
        """
        embeddings = []
        for img_path in self.dataset['image_location']:
            try:
                # Ensure the image path exists
                if not os.path.exists(img_path):
                    print(f"Warning: Image not found {img_path}")
                    # Append a zero vector for missing images
                    embeddings.append(np.zeros(512))
                    continue
                
                # Extract embedding
                embedding = self._extract_single_embedding(img_path)
                embeddings.append(embedding)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                # Append a zero vector for failed extractions
                embeddings.append(np.zeros(512))
        
        return np.array(embeddings)
    
    def _extract_single_embedding(self, image_path):
        """Extract embedding for a single image"""
        img = image.load_img(image_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        
        # Use VGG16 as a feature extractor (replace with a dedicated face embedding model in practice)
        model = VGG16(weights='imagenet', include_top=False, pooling='avg')
        embedding = model.predict(x)
        return embedding.flatten()
    
    def _normalize_embeddings(self, embeddings):
        """Normalize embeddings to unit length for cosine similarity"""
        norms = np.linalg.norm(embeddings, axis=1)
        return embeddings / norms[:, np.newaxis]
    
    def find_match(self, query_image_path, threshold=0.7):
        """
        Find potential matches for a query image
        
        Args:
            query_image_path (str): Path to the query image
            threshold (float): Similarity threshold for match
        
        Returns:
            dict: Matching results with similarity scores and tags
        """
        # Extract query embedding
        query_embedding = self._extract_single_embedding(query_image_path)
        query_embedding = self._normalize_embeddings(np.array([query_embedding]))
        
        # Perform similarity search
        distances, indices = self.index.search(query_embedding, k=5)
        
        # Process results
        matches = []
        for dist, idx in zip(distances[0], indices[0]):
            if dist >= threshold:
                # Get additional information from the original dataset
                match_info = self.dataset.iloc[idx]
                matches.append({
                    'index': int(idx),
                    'similarity': float(dist),
                    'image_location': match_info['image_location'],
                    'tags': match_info['tags'] if 'tags' in self.dataset.columns else None
                })
        
        return {
            'has_match': len(matches) > 0,
            'matches': matches
        }

def main():
    # Path to your CSV file
    dataset_path = 'dataset.csv'
    
    # Expected CSV format:
    # image_location,tags
    # /path/to/image1.jpg,person1,outdoor
    # /path/to/image2.jpg,person2,indoor
    
    # Initialize matcher
    matcher = EfficientFaceMatcher(dataset_path)
    
    # Example: Match a query image
    query_image_path = 'query_image.jpg'
    result = matcher.find_match(query_image_path)
    
    # Print results
    print("Matching Results:")
    if result['has_match']:
        for match in result['matches']:
            print(f"Match Found:")
            print(f"  Similarity: {match['similarity']}")
            print(f"  Image Location: {match['image_location']}")
            print(f"  Tags: {match['tags']}")
    else:
        print("No matches found.")

if __name__ == '__main__':
    main()