In [1]:
import cv2
import face_recognition
import speech_recognition as sr
from gtts import gTTS
from playsound import playsound
import google.generativeai as genai
import os
import numpy as np
import logging
import time
import pygame

pygame.mixer.init()

  from .autonotebook import tqdm as notebook_tqdm


pygame 2.6.1 (SDL 2.28.4, Python 3.10.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# Setup logging for robustness (stretch goal)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[logging.FileHandler("guard_log.txt"), logging.StreamHandler()])

# Configure Gemini API (replace with your key)
genai.configure(api_key="AIzaSyDhBUtOZ40t0FdwPOzG0XsUgDVC9PbLSQU")  # Get from makersuite.google.com
model = genai.GenerativeModel('gemini-2.5-flash')  # Free, fast model

In [3]:
# Listen for speech (ASR with retry for robustness)
def listen_for_speech(timeout=5, retries=3):
    recognizer = sr.Recognizer()
    for attempt in range(retries):
        try:
            with sr.Microphone() as source:
                logging.info("Listening...")
                audio = recognizer.listen(source, timeout=timeout)
            text = recognizer.recognize_google(audio).lower()
            logging.info(f"Recognized: {text}")
            return text
        except sr.WaitTimeoutError:
            logging.warning("No speech detected.")
        except sr.UnknownValueError:
            logging.warning("Could not understand audio.")
        except Exception as e:
            logging.error(f"ASR error: {e}")
    return ""

In [4]:
def speak(text):
    try:
        tts = gTTS(text=text, lang='en')
        tts.save('response.mp3')
        pygame.mixer.music.load('response.mp3')
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():  # Wait until playback finishes
            pygame.time.Clock().tick(50) 
        pygame.mixer.music.unload()# Control frame rate
        time.sleep(1)  # Extra buffer to ensure playback ends
        os.remove('response.mp3')
        logging.info(f"Spoke: {text}")
    except Exception as e:
        logging.error(f"TTS error: {e}")

In [5]:
# Enroll trusted faces from folder
def enroll_trusted_faces(folder_path):
    trusted_embeddings = {}
    try:
        for person in os.listdir(folder_path):
            person_path = os.path.join(folder_path, person)
            if os.path.isdir(person_path):
                trusted_embeddings[person] = []
                for img_file in os.listdir(person_path):
                    img_path = os.path.join(person_path, img_file)
                    image = face_recognition.load_image_file(img_path)
                    encodings = face_recognition.face_encodings(image)
                    if encodings:
                        trusted_embeddings[person].append(encodings[0])
                    else:
                        logging.warning(f"No face found in {img_path}")
        np.save('trusted_embeddings.npy', trusted_embeddings)  # Save for reuse
        logging.info("Enrollment complete.")
        return trusted_embeddings
    except Exception as e:
        logging.error(f"Enrollment error: {e}")
        return {}

In [6]:
# Check if trusted
def is_trusted(face_encoding, trusted_embeddings, tolerance=0.4):
    for person, embeds in trusted_embeddings.items():
        if embeds:
            distances = face_recognition.face_distance(embeds, face_encoding)
            if np.min(distances) < tolerance:
                return True, person
    return False, None

In [7]:
# Generate LLM response
def generate_response(prompt, level):
    try:
        full_prompt = f"Act as a polite but firm AI room guard. Escalation level {level}/3: Respond to potential intruder. Keep short, natural, engaging. Base: {prompt}."
        response = model.generate_content(full_prompt)
        return response.text.strip()
    except Exception as e:
        logging.error(f"LLM error: {e}")
        return f"Default level {level} warning."

In [8]:
# Escalation logic (3 levels, creative and coherent)
def escalate_conversation():
    escalation_level = 1
    while escalation_level <= 3:
        if escalation_level == 1:
            prompt = "Politely ask who they are."
        elif escalation_level == 2:
            prompt = "Firmly request they leave."
        else:
            prompt = "Issue a stern warning or alarm."

        response = generate_response(prompt, escalation_level)
        speak(response)

        # Listen for reply
        reply = listen_for_speech()
        if "friend" in reply or "owner" in reply:  # Simple de-escalation logic (enhance with LLM if needed)
            speak("Verified. Welcome.")
            break

        escalation_level += 1
        time.sleep(1)  # Pause between levels

    if escalation_level > 3:
        speak("Intruder alert! Alerting authorities.")  # Simulated alarm

In [9]:
# Load or enroll embeddings
if os.path.exists('trusted_embeddings.npy'):
    trusted_embeddings = np.load('trusted_embeddings.npy', allow_pickle=True).item()
else:
    trusted_embeddings = enroll_trusted_faces('trusted_faces/')
if not trusted_embeddings:
    logging.error("No trusted faces enrolled. Exiting.")

In [10]:
# Activation: Listen for command
guard_mode = False
while not guard_mode:
    command = listen_for_speech()
    if "guard my room" in command:
        guard_mode = True
        speak("Guard mode activated. Monitoring room.")
        logging.info("Guard mode ON.")

2025-10-03 18:47:56,457 - INFO - Listening...
2025-10-03 18:48:04,502 - INFO - Recognized: guard my room
2025-10-03 18:48:09,196 - INFO - Spoke: Guard mode activated. Monitoring room.
2025-10-03 18:48:09,197 - INFO - Guard mode ON.


In [11]:
cap = cv2.VideoCapture(0)  # 0 for default webcam
if not cap.isOpened():
    logging.error("Webcam access failed.")
# else: 
#     # Display live feed to verify camera activation
#     print("Webcam activated. Press 'q' to quit the live feed view.")
#     while True:
#         ret, frame = cap.read()  # Read a frame from the webcam
#         if not ret:
#             logging.error("Failed to capture frame from webcam.")
#             break
#         cv2.imshow('Webcam Feed', frame)  # Display the frame in a window
#         if cv2.waitKey(1) & 0xFF == ord('q'):  # Exit on 'q' key press
#             break

#     # Release the webcam and close the window
#     cap.release()
#     cv2.destroyAllWindows()
#     print("Webcam feed closed.")

In [None]:
frame_skip = 5  # Process every 5th frame for optimization
frame_count = 0
while guard_mode:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1
    if frame_count % frame_skip != 0:
        continue  # Skip frames for speed
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    face_locations = face_recognition.face_locations(rgb_frame)
    face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)
    detected = False
    for encoding in face_encodings:
        detected = True
        trusted, person = is_trusted(encoding, trusted_embeddings)
        if trusted:
            speak(f"Welcome back, {person}.")
            logging.info(f"Trusted user: {person}")
        else:
            logging.info("Untrusted detected. Escalating.")
            escalate_conversation()
    if not detected:
        time.sleep(0.1)  # Brief pause if no face
    cv2.imshow('AI Guard', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
logging.info("Guard mode OFF.")

2025-10-03 18:48:18,717 - INFO - Spoke: Welcome back, sachi.
2025-10-03 18:48:18,719 - INFO - Trusted user: sachi
2025-10-03 18:48:22,538 - INFO - Spoke: Welcome back, sachi.
2025-10-03 18:48:22,539 - INFO - Trusted user: sachi
2025-10-03 18:48:26,134 - INFO - Untrusted detected. Escalating.
2025-10-03 18:48:46,029 - INFO - Spoke: Hello there. I've detected an unscheduled presence. Could you please identify yourself?
2025-10-03 18:48:46,074 - INFO - Listening...
2025-10-03 18:48:53,677 - INFO - Recognized: no
2025-10-03 18:49:14,819 - INFO - Spoke: Attention. My sensors indicate an unauthorized presence within this secured area. You are not permitted here. I require you to leave immediately.
2025-10-03 18:49:14,863 - INFO - Listening...
2025-10-03 18:49:38,873 - INFO - Listening...
2025-10-03 18:49:42,674 - INFO - Recognized: no
2025-10-03 18:50:14,756 - INFO - Listening...
2025-10-03 18:50:17,702 - INFO - Recognized: no
2025-10-03 18:50:23,309 - INFO - Spoke: Intruder alert! Alerting 

KeyboardInterrupt: 

: 