### 1.Prayash Kumar Sahu(22B1261)
### 2.Aditya Singh Bhadoria(22B1247)

### Github Link: https://github.com/AdityaBhadoria09/ASS2_EE782_AI_agent.git

In [None]:
pip install deepface opencv-python speechrecognition pyttsx3

In [4]:
pip install pyaudio

Collecting pyaudioNote: you may need to restart the kernel to use updated packages.

  Downloading PyAudio-0.2.14-cp38-cp38-win_amd64.whl.metadata (2.7 kB)
Downloading PyAudio-0.2.14-cp38-cp38-win_amd64.whl (164 kB)
   ---------------------------------------- 0.0/164.1 kB ? eta -:--:--
   -- ------------------------------------- 10.2/164.1 kB ? eta -:--:--
   ---- ---------------------------------- 20.5/164.1 kB 217.9 kB/s eta 0:00:01
   ---- ---------------------------------- 20.5/164.1 kB 217.9 kB/s eta 0:00:01
   --------- ----------------------------- 41.0/164.1 kB 217.9 kB/s eta 0:00:01
   -------------- ------------------------ 61.4/164.1 kB 251.0 kB/s eta 0:00:01
   -------------------------------------  163.8/164.1 kB 653.6 kB/s eta 0:00:01
   -------------------------------------- 164.1/164.1 kB 578.3 kB/s eta 0:00:00
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14


In [17]:
import tensorflow as tf
print(tf.__version__)

2.13.0


In [2]:
import os

In [1]:
import os  # Used for interacting with the operating system (e.g., listing files in a directory)
import cv2  # OpenCV: For image reading, manipulation, and augmentations
import numpy as np  # NumPy: For numerical operations, especially with arrays (embeddings)
from deepface import DeepFace  # The core library for face recognition and embedding generation

# ------------------------------
# Configuration Constants
# ------------------------------
# Define the face recognition model to use.
# 'Facenet' is a popular and robust model developed by Google.
MODEL = "Facenet"

# Define the face detector backend.
# 'retinaface' is a high-accuracy detector, good at finding faces even in challenging conditions.
DETECTOR = "retinaface"

# ------------------------------
# Image Augmentation Function
# ------------------------------

def augment_image(img):
    """
    Generates a list of augmented (modified) versions of a single input image.
    
    These augmentations create variations in lighting, orientation, and scale,
    which makes the final averaged embedding more robust to real-world conditions.

    Args:
        img (np.ndarray): The original image read by cv2.

    Returns:
        list: A list of np.ndarray images, starting with the original image
              followed by all its augmented versions.
    """
    # Get the original image dimensions
    h, w = img.shape[:2]
    
    # Start the list of augmented images, beginning with the original
    augmented = [img]

    # --- 1. Flip ---
    # Flip the image horizontally. This helps the model learn
    # that a person is the same regardless of left/right orientation.
    augmented.append(cv2.flip(img, 1))

    # --- 2. Brightness ---
    # Create darker (alpha=0.8) and brighter (alpha=1.2) versions.
    # This simulates different lighting conditions.
    for alpha in [0.8, 1.2]:
        # cv2.convertScaleAbs changes the brightness (alpha) and contrast (beta)
        bright = cv2.convertScaleAbs(img, alpha=alpha, beta=0)
        augmented.append(bright)

    # --- 3. Rotation ---
    # Rotate the image slightly to simulate head tilt.
    for angle in [-10, 10]:  # 10 degrees left and right
        # Get the rotation matrix for rotating around the center of the image
        M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
        # Apply the rotation
        rotated = cv2.warpAffine(img, M, (w, h))
        augmented.append(rotated)

    # --- 4. Scaling (Zoom) ---
    # Simulate the person being closer or farther away.
    for scale in [0.9, 1.1]:  # Zoom out (90%) and zoom in (110%)
        # Resize the image
        resized = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
        
        # After scaling, we need to crop/pad it back to the original size (w, h)
        # to maintain consistent dimensions.
        
        # Calculate top-left (x1, y1) coordinates for a center crop
        y1 = max(0, (resized.shape[0] - h) // 2)
        x1 = max(0, (resized.shape[1] - w) // 2)
        
        # Perform the crop
        cropped = resized[y1:y1 + h, x1:x1 + w]
        
        # In case the 0.9 scale-down + crop resulted in a slightly smaller image
        # (due to rounding), resize it exactly back to the original dimensions.
        cropped = cv2.resize(cropped, (w, h))
        augmented.append(cropped)

    return augmented


# ------------------------------
# Single Embedding Computation
# ------------------------------

def get_embedding_from_image(img):
    """
    Computes a single, L2-normalized embedding for a given image array.

    This is a helper function called by `get_embedding`.

    Args:
        img (np.ndarray): An image object (not a file path).

    Returns:
        np.ndarray: A 1D L2-normalized embedding vector for the face in the image.
    
    Raises:
        Exception: If DeepFace fails to generate an embedding.
    """
    # DeepFace.represent finds the face and computes its vector representation (embedding).
    # `enforce_detection=False`: If DeepFace can't find a face, it will try to
    # process the whole image. This can be useful but may also lead to errors
    # if the augmented image is too distorted, which is why we have a try/except
    # block in the calling function `get_embedding`.
    rep = DeepFace.represent(
        img_path=img,
        model_name=MODEL,
        detector_backend=DETECTOR,
        enforce_detection=False
    )
    
    # The result `rep` is a list of dictionaries. We take the embedding from the first result.
    emb = np.array(rep[0]["embedding"])
    
    # L2 Normalization (emb / np.linalg.norm(emb)):
    # This scales the embedding vector to have a length of 1.
    # This is a crucial step! Normalized vectors allow us to use
    # cosine similarity (or dot product) to measure how "close" two faces are.
    return emb / np.linalg.norm(emb)


def get_embedding(img_path):
    """
    Computes a robust, averaged embedding for a single image file.
    
    It does this by:
    1. Reading the image from the file path.
    2. Generating multiple augmentations of the image.
    3. Computing an embedding for *each* augmentation.
    4. Averaging all successful embeddings into a single "mean" embedding.
    5. Normalizing the final mean embedding.
    
    This "averaged" embedding is much more reliable than one from a single pose/lighting.

    Args:
        img_path (str): The file path to the image.

    Returns:
        np.ndarray: The final L2-normalized, averaged embedding vector.
    
    Raises:
        ValueError: If the image cannot be read or no valid embeddings
                    could be generated from any of its augmentations.
    """
    # Read the image from the specified path
    img = cv2.imread(img_path)
    if img is None:
        raise ValueError(f"Cannot read {img_path}")

    # Generate all augmented versions
    augmented_images = augment_image(img)
    embeddings = []

    # Loop through all augmented images (including the original)
    for aug in augmented_images:
        try:
            # Try to get an embedding for this specific augmentation
            emb = get_embedding_from_image(aug)
            embeddings.append(emb)
        except Exception as e:
            # If DeepFace fails (e.g., rotation makes face undetectable),
            # just print a warning and skip this augmentation.
            print(f"⚠️ Augmentation skipped for {img_path}: {e}")
            continue

    # After trying all augmentations, check if we got any successful results
    if len(embeddings) == 0:
        raise ValueError(f"No valid embeddings generated for {img_path}.")
    
    # Calculate the mean (average) embedding across all successful augmentations.
    # `axis=0` computes the mean down each column, resulting in a single 1D vector.
    mean_embedding = np.mean(embeddings, axis=0)
    
    # Normalize the final mean embedding as well, just to ensure it's a unit vector.
    return mean_embedding / np.linalg.norm(mean_embedding)


# ------------------------------
# Batch Processing Function
# ------------------------------

def compute_all(folder):
    """
    Processes all images in a given folder and computes their robust embeddings.

    Args:
        folder (str): The path to the directory containing images.

    Returns:
        tuple:
            - np.ndarray: A 2D array where each row is an embedding.
            - list: A list of filenames corresponding to each embedding row.
    """
    embeddings, names = [], []
    valid_extensions = (".jpg", ".jpeg", ".png", ".webp")
    
    # Loop through every file in the specified folder
    for f in os.listdir(folder):
        # Check if the file has a valid image extension
        if f.lower().endswith(valid_extensions):
            path = os.path.join(folder, f)
            try:
                # Get the robust, averaged embedding for this image
                emb = get_embedding(path)
                embeddings.append(emb)
                names.append(f)
                print(f"✅ {f} processed with augmentation.")
            except Exception as e:
                # If `get_embedding` fails (e.g., image is corrupt or no face
                # could be processed at all), skip this file.
                print(f"⚠️ {f} skipped: {e}")
                
    # Convert the list of embeddings into a 2D NumPy array
    return np.array(embeddings), names


# ------------------------------
# Main Execution Block
# ------------------------------

# This block only runs when the script is executed directly
# (not when it's imported as a module).
if __name__ == "__main__":
    
    # --- Step 1: Process Trusted Faces ---
    # These are the images of people you "know" or "trust".
    print("Processing trusted faces...")
    trusted_embeddings, trusted_names = compute_all("trusted_faces")
    print(f"--- Generated {len(trusted_embeddings)} trusted embeddings ---")

    # --- Step 2: Process Random Faces ---
    # These are images of unknown people, used for threshold calibration or as negative samples.
    print("\nProcessing random faces...")
    random_embeddings, random_names = compute_all("random_faces")
    print(f"--- Generated {len(random_embeddings)} random embeddings ---")

    # --- Step 3: Save Embeddings ---
    # Save the computed embeddings to a compressed NumPy file (.npz).
    # This allows you to load them quickly later without re-computing everything.
    np.savez(
        "embeddings.npz",
        trusted=trusted_embeddings,
        random=random_embeddings
        # Note: We don't save the 'names' lists here, but you could:
        # trusted_names=trusted_names,
        # random_names=random_names
    )
    
    print("\n✅ All embeddings saved to embeddings.npz")


Processing trusted faces...
✅ Aditya.jpg processed with augmentation.
--- Generated 1 trusted embeddings ---

Processing random faces...
✅ ee782_ass2_pic2.jpg processed with augmentation.
✅ ee782_ass2_pic3.jpg processed with augmentation.
✅ ee782_ass2_pic6.jpg processed with augmentation.
✅ ee782_ass2_pic7.jpg processed with augmentation.
✅ WIN_20251021_23_35_22_Pro.jpg processed with augmentation.
--- Generated 5 random embeddings ---

✅ All embeddings saved to embeddings.npz


In [4]:
import cv2
import numpy as np
import time
import datetime
from deepface import DeepFace

# === Load embeddings ===
data = np.load("embeddings.npz")
trusted_embeddings = data["trusted"]
random_embeddings = data["random"]

# Compute centroid of trusted faces
trusted_centroid = np.mean(trusted_embeddings, axis=0)
trusted_centroid /= np.linalg.norm(trusted_centroid)

# Determine dynamic threshold based on separation
# Compute average distance between trusted centroid and random faces
def cosine_sim(a, b): return np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b))
random_sims = [cosine_sim(trusted_centroid, r) for r in random_embeddings]
trusted_sims = [cosine_sim(trusted_centroid, t) for t in trusted_embeddings]

# Example: choose threshold halfway between mean trusted and random similarities
THRESHOLD = (np.mean(trusted_sims) + np.mean(random_sims)) / 2
print(f"🔹 Auto-calibrated threshold: {THRESHOLD:.3f}")

# === Real-time detection ===
cap = cv2.VideoCapture(0)
last_unknown_save = 0
SAVE_COOLDOWN = 10
unknown_dir = "unknown_faces"
os.makedirs(unknown_dir, exist_ok=True)

print("🎥 Camera running. Press 'q' to exit.")

frame_count = 0  # <--- initialize before the loop

while True:
    ret, frame = cap.read()
    if not ret:
        continue

    frame_count += 1
    if frame_count % 3 != 0:   # <-- skip every 2 out of 3 frames
        continue

    # ↓ Resize frame before running face detection (faster RetinaFace)
    frame = cv2.resize(frame, (480, 360))    

    try:
        detections = DeepFace.extract_faces(
            img_path=frame,
            detector_backend="opencv",
            enforce_detection=False
        )
    except Exception:
        detections = []

    for det in detections:
        face = det["face"]
        area = det["facial_area"]
        x, y, w, h = area["x"], area["y"], area["w"], area["h"]

        # Compute embedding
        try:
            rep = DeepFace.represent(
                img_path=face,
                model_name="Facenet",
                detector_backend="skip",  # already cropped
                enforce_detection=False
            )
            emb = np.array(rep[0]["embedding"])
            emb = emb / np.linalg.norm(emb)
        except Exception:
            continue

        # Compare with trusted centroid
        sim = cosine_sim(emb, trusted_centroid)
        if sim > THRESHOLD:
            label = f"TRUSTED ({sim:.2f})"
            color = (0, 255, 0)
        else:
            label = f"UNKNOWN ({sim:.2f})"
            color = (0, 0, 255)
            now = time.time()
            if now - last_unknown_save > SAVE_COOLDOWN:
                ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
                fname = f"unknown_{ts}.jpg"
                cv2.imwrite(f"{unknown_dir}/{fname}", face)
                print(f"💾 Unknown saved: {fname}")
                print("You are not authorized!! Please Leave!")
                last_unknown_save = now

        cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
        cv2.putText(frame, label, (x, y - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    cv2.imshow("Face Verification", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

🔹 Auto-calibrated threshold: 0.619
🎥 Camera running. Press 'q' to exit.
