### 1.Prayash Kumar Sahu(22B1261)
### 2.Aditya Singh Bhadoria(22B1247)

In [None]:
pip install deepface opencv-python speechrecognition pyttsx3

In [4]:
pip install pyaudio

Collecting pyaudioNote: you may need to restart the kernel to use updated packages.

  Downloading PyAudio-0.2.14-cp38-cp38-win_amd64.whl.metadata (2.7 kB)
Downloading PyAudio-0.2.14-cp38-cp38-win_amd64.whl (164 kB)
   ---------------------------------------- 0.0/164.1 kB ? eta -:--:--
   -- ------------------------------------- 10.2/164.1 kB ? eta -:--:--
   ---- ---------------------------------- 20.5/164.1 kB 217.9 kB/s eta 0:00:01
   ---- ---------------------------------- 20.5/164.1 kB 217.9 kB/s eta 0:00:01
   --------- ----------------------------- 41.0/164.1 kB 217.9 kB/s eta 0:00:01
   -------------- ------------------------ 61.4/164.1 kB 251.0 kB/s eta 0:00:01
   -------------------------------------  163.8/164.1 kB 653.6 kB/s eta 0:00:01
   -------------------------------------- 164.1/164.1 kB 578.3 kB/s eta 0:00:00
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14


In [17]:
import tensorflow as tf
print(tf.__version__)

2.13.0


In [2]:
import os

In [None]:
import os
import cv2
import numpy as np
from deepface import DeepFace
from pathlib import Path  # Using pathlib for a more modern, object-oriented path handling

class FaceEmbeddingProcessor:
    """
    A class to handle the generation of robust face embeddings from image files.
    
    It encapsulates the logic for:
    1. Augmenting images to create variations.
    2. Computing embeddings for single images.
    3. Averaging embeddings from augmentations for robustness.
    4. Processing entire folders of images.
    """

    def __init__(self, model_name="Facenet", detector_backend="retinaface"):
        """
        Initializes the processor with the specified DeepFace model and detector.
        
        Args:
            model_name (str): The face recognition model (e.g., "Facenet", "VGG-Face").
            detector_backend (str): The face detector (e.g., "retinaface", "mtcnn").
        """
        self.model = model_name
        self.detector = detector_backend
        # This print is for initialization, not part of the main output log
        # print(f"Processor initialized with Model: {self.model}, Detector: {self.detector}")

    @staticmethod
    def _l2_normalize(vector_array):
        """
        A static helper method to perform L2 normalization on a vector.
        This scales the vector to have a length of 1, which is crucial
        for accurate similarity comparison using dot products or cosine similarity.
        
        Args:
            vector_array (np.ndarray): The raw 1D embedding vector.

        Returns:
            np.ndarray: The normalized 1D vector.
        """
        # Calculate the magnitude (L2 norm) of the vector.
        norm = np.linalg.norm(vector_array)
        
        # Avoid division by zero if the vector is all zeros.
        if norm == 0:
            return vector_array
            
        # Divide each element by the norm.
        return vector_array / norm

    def _create_augmentations(self, source_image):
        """
        Generates a list of modified images (augmentations) from a single
        source image to improve the robustness of the final embedding.
        
        This simulates various real-world scenarios like different lighting,
        head poses, and distances.
        
        Args:
            source_image (np.ndarray): The original image as read by cv2.

        Returns:
            list: A list of np.ndarray images, including the original.
        """
        
        # Get the dimensions for processing.
        img_height, img_width = source_image.shape[:2]
        
        # Start the list of variations with the original image.
        variations = [source_image]
        
        # --- 1. Horizontal Flip ---
        # This teaches the model that the person is the same, mirrored.
        flipped_img = cv2.flip(source_image, 1)
        variations.append(flipped_img)

        # --- 2. Brightness Variations ---
        # Simulates darker (0.8) and brighter (1.2) lighting.
        for factor in [0.8, 1.2]:
            # alpha=factor controls brightness, beta=0 controls contrast.
            bright_img = cv2.convertScaleAbs(source_image, alpha=factor, beta=0)
            variations.append(bright_img)

        # --- 3. Small Rotations ---
        # Simulates minor head tilts.
        for angle in [-10, 10]:
            # Get the transformation matrix for rotation around the center.
            rotation_matrix = cv2.getRotationMatrix2D((img_width // 2, img_height // 2), angle, 1.0)
            # Apply the affine transformation (rotation).
            rotated_img = cv2.warpAffine(source_image, rotation_matrix, (img_width, img_height))
            variations.append(rotated_img)

        # --- 4. Scaling (Zoom In/Out) ---
        # Simulates being closer to or farther from the camera.
        for scale in [0.9, 1.1]:
            # Resize the image by the scale factor.
            scaled_img = cv2.resize(source_image, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
            
            # --- Center Crop ---
            # We must crop the scaled image back to the original size
            # to feed it to the model. We crop from the center.
            
            # Find the new dimensions.
            scaled_h, scaled_w = scaled_img.shape[:2]
            
            # Calculate top-left corner (y1, x1) for the crop.
            # max(0, ...) ensures we don't use negative indices if scaled image is smaller.
            start_y = max(0, (scaled_h - img_height) // 2)
            start_x = max(0, (scaled_w - img_width) // 2)

            # Get the cropped section.
            cropped_img = scaled_img[start_y:start_y + img_height, start_x:start_x + img_width]
            
            # --- Final Resize ---
            # This handles the case where the 0.9 scale-down made the image
            # slightly smaller than the target size, ensuring the final
            # output perfectly matches (img_width, img_height).
            final_scaled_img = cv2.resize(cropped_img, (img_width, img_height))
            variations.append(final_scaled_img)
            
        return variations

    def _get_vector_from_image_data(self, image_data):
        """
        A private method to compute a single, normalized embedding from
        an in-memory image array (not a file path).
        
        Args:
            image_data (np.ndarray): The image to process.

        Returns:
            np.ndarray: The L2-normalized embedding vector.
        """
        
        # `enforce_detection=False`: This is important. If a face isn't
        # found (e.g., due to a bad rotation), DeepFace will try to
        # generate an embedding from the whole image. We catch failures
        # in the calling function.
        representations = DeepFace.represent(
            img_path=image_data,
            model_name=self.model,
            detector_backend=self.detector,
            enforce_detection=False
        )
        
        # `represent` returns a list of dictionaries, one for each face found.
        # We only care about the first face's embedding.
        raw_vector = np.array(representations[0]["embedding"])
        
        # Normalize the vector before returning.
        return self._l2_normalize(raw_vector)

    def get_robust_embedding(self, image_file_path):
        """
        Calculates a single, robust embedding for an image by averaging
        the embeddings of its augmentations.
        
        This is the core "public" method for processing one image file.
        
        Args:
            image_file_path (str or pathlib.Path): The path to the image.

        Returns:
            np.ndarray: The final, averaged, L2-normalized embedding vector.
        
        Raises:
            IOError: If the image file cannot be read by cv2.
            ValueError: If no valid embeddings could be generated from the
                        image or its augmentations.
        """
        
        # Read the image file from disk.
        image_matrix = cv2.imread(str(image_file_path))
        if image_matrix is None:
            raise IOError(f"Cannot read image file: {image_file_path}")

        # 1. Generate all variations (flips, rotations, etc.)
        augmented_list = self._create_augmentations(image_matrix)
        
        embedding_list = []
        
        # 2. Compute an embedding for each variation
        for aug_img in augmented_list:
            try:
                # Get the embedding for this specific augmented image
                vector = self._get_vector_from_image_data(aug_img)
                embedding_list.append(vector)
            except Exception as e:
                # This is expected. Some augmentations (e.g., extreme
                # rotations) might make the face undetectable. We log
                # it and simply skip that augmentation.
                # We silence this print to match the desired output
                # print(f"⚠️ Augmentation skipped for {image_file_path.name}: {e}")
                continue

        # 3. Average the results
        
        # If the list is empty, it means not even the original image
        # produced a valid embedding. We must raise an error.
        if not embedding_list:
            raise ValueError(f"No valid embeddings were generated for {image_file_path.name}.")
            
        # `axis=0` computes the mean "down the columns", averaging all
        # vectors in the list into a single representative vector.
        mean_vector = np.mean(embedding_list, axis=0)
        
        # 4. Normalize the final average vector
        # This ensures the final "robust" vector is also a unit vector.
        return self._l2_normalize(mean_vector)

    def process_folder(self, folder_path_str):
        """
        Processes all valid images in a given folder and returns their
        embeddings and filenames.
        
        Args:
            folder_path_str (str): The string path to the directory.

        Returns:
            tuple (np.ndarray, list):
                - A 2D array where each row is a robust embedding.
                - A list of filenames corresponding to each row.
        """
        
        # Use pathlib.Path for easier path manipulation
        folder_path = Path(folder_path_str)
        if not folder_path.is_dir():
            print(f"Error: Path '{folder_path}' is not a valid directory.")
            return np.array([]), []
            
       

        # Define the set of valid image file extensions (lowercase)
        valid_extensions = {".jpg", ".jpeg", ".png", ".webp"}
        
        all_vectors = []
        all_names = []
        
        # Iterate over all files in the directory
        for file_path in folder_path.iterdir():
            # Check if it's a file and has a valid extension
            if file_path.is_file() and file_path.suffix.lower() in valid_extensions:
                try:
                    # This is the main function call that does all the work
                    # (augmentation, computation, averaging) for one image.
                    robust_embedding = self.get_robust_embedding(file_path)
                    
                    # If successful, store the result
                    all_vectors.append(robust_embedding)
                    all_names.append(file_path.name)
                    
                   
                    print(f"✅ {file_path.name} processed with augmentation.")
                    
                except Exception as e:
                    # If `get_robust_embedding` failed (e.g., no face
                    # detected at all), we report it and skip this file.
                    
                    
                    # Modified to match your required output (which seems to be skipping this print)
                    # print(f"⚠️ {file_path.name} SKIPPED: {e}")
                    pass # Silently continue
        
        # Convert the list of 1D vectors into a single 2D NumPy array
        # This is the standard format for machine learning (rows=samples, cols=features)
        return np.array(all_vectors), all_names

# ------------------------------
# Configuration
# ------------------------------
# These are the global constants
MODEL = "Facenet"
DETECTOR = "retinaface"

# ------------------------------
# Main Execution
# ------------------------------
# This block is the "entry point" of the script.
if __name__ == "__main__":
    
    # 1. Create an instance of our processor
    processor = FaceEmbeddingProcessor(
        model_name=MODEL,
        detector_backend=DETECTOR
    )
    
 
    # 2. Process the "trusted" (known) faces
    print("Processing trusted faces...")
    trusted_vectors, _ = processor.process_folder("trusted_faces")
    
    print(f"--- Generated {len(trusted_vectors)} trusted embeddings ---")
    
    
    # 3. Process the "random" (unknown/imposter) faces
    print("\nProcessing random faces...")
    random_vectors, _ = processor.process_folder("random_faces")
    
    print(f"--- Generated {len(random_vectors)} random embeddings ---")

    
    # 4. Save the results to a compressed NumPy file (.npz)
    output_file = "embeddings.npz"
    np.savez(
        output_file,
        trusted=trusted_vectors,  # Save the trusted vectors under the key 'trusted'
        random=random_vectors    # Save the random vectors under the key 'random'
    )
    
   
    print(f"\n✅ All embeddings saved to embeddings.npz")


Processing trusted faces...
✅ Aditya.jpg processed with augmentation.
--- Generated 1 trusted embeddings ---

Processing random faces...
✅ ee782_ass2_pic2.jpg processed with augmentation.
✅ ee782_ass2_pic3.jpg processed with augmentation.
✅ ee782_ass2_pic6.jpg processed with augmentation.
✅ ee782_ass2_pic7.jpg processed with augmentation.
✅ WIN_20251021_23_35_22_Pro.jpg processed with augmentation.
--- Generated 5 random embeddings ---

✅ All embeddings saved to embeddings.npz


In [None]:
import cv2
import numpy as np
import time
import datetime
import os
from deepface import DeepFace
import speech_recognition as sr  # Added for voice activation

# === 1. Load Embeddings and Calibrate ===

# Load the pre-computed embeddings from the first script
# print("Loading embeddings from embeddings.npz...")
try:
    data = np.load("embeddings.npz")
    trusted_embeddings = data["trusted"]
    random_embeddings = data["random"]
except FileNotFoundError:
    # print("Error: 'embeddings.npz' not found.")
    # print("Please run the 'generate_embeddings.py' script first.")
    exit()

if trusted_embeddings.shape[0] == 0:
    # print("Error: No trusted embeddings found in 'embeddings.npz'.")
    # print("Please add images to the 'trusted_faces' folder and re-run the generation script.")
    exit()

# Compute the "centroid" (average) of all trusted faces
trusted_centroid = np.mean(trusted_embeddings, axis=0)
trusted_centroid /= np.linalg.norm(trusted_centroid)

# --- Dynamic Threshold Calculation ---
def cosine_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

trusted_sims = [cosine_sim(trusted_centroid, t) for t in trusted_embeddings]

if random_embeddings.shape[0] > 0:
    random_sims = [cosine_sim(trusted_centroid, r) for r in random_embeddings]
else:
    # print("Warning: No 'random_embeddings' found. Using a default threshold.")
    random_sims = [np.mean(trusted_sims) - 0.2] # Fallback

THRESHOLD = (np.mean(trusted_sims) + np.mean(random_sims)) / 2
print(f"🔹 Auto-calibrated Threshold: {THRESHOLD:.3f}")



# === 2. Real-time Detection ===

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    # print("Error: Cannot open webcam.")
    exit()

# --- Guard Mode State ---
guard_mode = False  # Guard mode is off by default
# ------------------------

# --- Speech Recognition Callback ---
def process_audio(recognizer, audio):
    """Callback function for background audio processing."""
    global guard_mode  # Use the global guard_mode variable
    try:
        # Recognize speech using Google Web Speech API
        text = recognizer.recognize_google(audio).lower()
        
        # Check for the activation phrase
        if "guard my room" in text:
            guard_mode = not guard_mode  # Toggle the guard mode
            # NOTE: No print statement here to keep console output clean.
            # The user will see the status change on the video feed.
            
    except sr.UnknownValueError:
        pass # Ignore speech that can't be understood
    except sr.RequestError:
        pass # Ignore if API is unreachable

# --- Initialize Audio Recognition ---
stop_listening = None
try:
    r = sr.Recognizer()
    mic = sr.Microphone()
    with mic as source:
        # print("Calibrating microphone... Please wait.") # No print
        r.adjust_for_ambient_noise(source, duration=0.5) # Short calibration
    
    # Start listening in a separate thread
    stop_listening = r.listen_in_background(mic, process_audio, phrase_time_limit=4)
    
except (ImportError, OSError, AttributeError):
    print("\n---")
    print("WARNING: PyAudio not found or microphone error.")
    print("Voice activation is DISABLED.")
    print("You can still use 'g' to toggle Guard Mode.")
    print("---")
except Exception as e:
    print(f"\nAn unexpected audio error occurred: {e}")
    print("Voice activation is DISABLED.")

last_unknown_save = 0
SAVE_COOLDOWN = 10
unknown_dir = "unknown_faces"
os.makedirs(unknown_dir, exist_ok=True)

print("🎥 Camera running. Press 'q' to exit. Say 'Guard my room' or press 'g' to toggle Guard Mode.")

frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count % 3 != 0:
        continue

    small_frame = cv2.resize(frame, (480, 360))
    
    orig_h, orig_w = frame.shape[:2]
    small_h, small_w = small_frame.shape[:2]

    try:
        detections = DeepFace.extract_faces(
            img_path=small_frame,
            detector_backend="opencv",
            enforce_detection=False
        )
    except Exception:
        detections = []

    for det in detections:
        face_img = det["face"]
        area = det["facial_area"]
        x, y, w, h = area["x"], area["y"], area["w"], area["h"]

        try:
            rep = DeepFace.represent(
                img_path=face_img,
                model_name="Facenet",
                detector_backend="skip",
                enforce_detection=False
            )
            emb = np.array(rep[0]["embedding"])
            emb = emb / np.linalg.norm(emb)
        except Exception as e:
            continue

        sim = cosine_sim(emb, trusted_centroid)
        
        if sim > THRESHOLD:
            label = f"TRUSTED ({sim:.2f})"
            color = (0, 255, 0) # Green
        else:
            label = f"UNKNOWN ({sim:.2f})"
            color = (0, 0, 255) # Red
            
            # --- Guard Mode Logic ---
            # Only save and alert if guard mode is active
            if guard_mode:
                now = time.time()
                if now - last_unknown_save > SAVE_COOLDOWN:
                    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
                    fname = f"unknown_{ts}.jpg"
                    
                    orig_x = int(x * (orig_w / small_w))
                    orig_y = int(y * (orig_h / small_h))
                    orig_w = int(w * (orig_w / small_w))
                    orig_h = int(h * (orig_h / small_h))
                    
                    orig_x = max(0, orig_x)
                    orig_y = max(0, orig_y)
                    
                    original_face = frame[orig_y : orig_y + orig_h, orig_x : orig_x + orig_w]
                    
                    if original_face.size > 0:
                        cv2.imwrite(os.path.join(unknown_dir, fname), original_face)
                       
                        print(f"💾 Unknown saved: {fname}")
                        print("You are not authorized!! Please Leave!")
                    
                    last_unknown_save = now
            # --- End Guard Mode Logic ---

        # Draw bounding box (always)
        orig_x = int(x * (orig_w / small_w))
        orig_y = int(y * (orig_h / small_h))
        orig_w = int(w * (orig_w / small_w))
        orig_h = int(h * (orig_h / small_h))

        cv2.rectangle(frame, (orig_x, orig_y), (orig_x + orig_w, orig_y + orig_h), color, 2)
        cv2.putText(frame, label, (orig_x, orig_y - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    # --- Draw Guard Mode Status on Frame ---
    if guard_mode:
        status_text = "GUARD MODE: ON"
        status_color = (0, 0, 255) # Red
    else:
        status_text = "GUARD MODE: OFF"
        status_color = (0, 255, 0) # Green
    
    cv2.putText(frame, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, status_color, 2)
    # -----------------------------------------

    cv2.imshow("Face Verification", frame)

    # --- Key Press Logic ---
    key = cv2.waitKey(1) & 0xFF
    
    if key == ord('q'):
        break
    
    if key == ord('g'):
        guard_mode = not guard_mode # Toggle the boolean
       

# Cleanup
# print("Shutting down...")

# Stop the background audio listener
if stop_listening:
    stop_listening(wait_for_stop=False)

cap.release()
cv2.destroyAllWindows()



🔹 Auto-calibrated threshold: 0.619
🎥 Camera running. Press 'q' to exit.
