## Milestone 1

# 🛡️ Milestone 1: Activation and Basic Input

### 🎯 Objective
The goal of Milestone 1 was to implement **speech-based activation** for the AI Guard Agent.  
Specifically:
- Detect a **spoken command** (e.g., *“Guard my room”*) from an audio file.  
- Convert the audio into text using **Automatic Speech Recognition (ASR)**.  
- Implement **state management** to switch Guard Mode ON/OFF.  
- Provide **feedback** (console + optional Text-to-Speech).  

---

### ✅ What Was Done
1. **Audio Input**  
   - Since Colab cannot directly access the microphone, we used **uploaded audio recordings** (`.m4a`, `.wav`, `.mp3`).  
   - Used `pydub` + `ffmpeg` to convert any format into a proper PCM `.wav` file.  

2. **Speech Recognition**  
   - Used `speech_recognition` (Google Web Speech API) to transcribe the uploaded command.  

3. **Command Detection**  
   - Implemented a simple logic to check if the command contains *“guard my room”*.  
   - If detected → activate Guard Mode (`True`).  
   - Otherwise → remain inactive.  

4. **Feedback**  
   - Console message: 🔒 *Guard Mode Activated!*  
   - Added optional **Text-to-Speech (gTTS)** to give audio confirmation.  

---

### 📜 Code Implementation

```python
# Install dependencies
!pip install speechrecognition pydub gtts
!apt-get install -y ffmpeg

# --- Step 1: Upload audio file ---
from google.colab import files
uploaded = files.upload()   # Upload your command.m4a / command.wav / command.mp3

# --- Step 2: Convert audio to PCM WAV ---
from pydub import AudioSegment

input_file = list(uploaded.keys())[0]       # get uploaded filename
output_file = "command_fixed.wav"

sound = AudioSegment.from_file(input_file)  # auto-detects format
sound = sound.set_channels(1).set_frame_rate(16000)  # mono + 16kHz
sound.export(output_file, format="wav")

# --- Step 3: Speech Recognition ---
import speech_recognition as sr

recognizer = sr.Recognizer()
with sr.AudioFile(output_file) as source:
    audio_data = recognizer.record(source)
    command = recognizer.recognize_google(audio_data)
    print("You said:", command)

# --- Step 4: Guard Mode Activation Logic ---
guard_mode = False

if "guard my room" in command.lower():
    guard_mode = True
    print("🔒 Guard Mode Activated!")
else:
    print("⚠️ Command not recognized.")

print("Guard mode status:", guard_mode)

# --- Step 5: Optional Feedback with TTS ---
from gtts import gTTS
import os

if guard_mode:
    tts = gTTS("Guard mode activated", lang='en')
    tts.save("response.mp3")
    os.system("mpg123 response.mp3")  # or: !apt-get install -y mpg123


In [None]:
!pip install SpeechRecognition pydub gtts playsound
!pip install pydub
!apt-get install -y ffmpeg

In [None]:
from google.colab import files
uploaded = files.upload()  # a popup will appear, select command.wav

In [None]:
from pydub import AudioSegment

# Replace with your uploaded file name
input_file = "command.m4a"
output_file = "command_fixed.wav"

sound = AudioSegment.from_file(input_file, format="m4a")
sound = sound.set_channels(1).set_frame_rate(16000)  # mono + 16kHz
sound.export(output_file, format="wav")


In [None]:
import speech_recognition as sr

recognizer = sr.Recognizer()
with sr.AudioFile("command_fixed.wav") as source:
    audio_data = recognizer.record(source)
    command = recognizer.recognize_google(audio_data)
    print("You said:", command)


In [None]:
text = command.lower()

if "guard my room" in text:
    if "don't guard my room" in text or "do not guard my room" in text or "stop guarding" in text:
        guard_mode = False
        print("🛑 Guard Mode Deactivated!")
    else:
        guard_mode = True
        print("🔒 Guard Mode Activated!")
else:
    print("⚠️ Command not recognized.")

print("Guard mode status:", guard_mode)


In [None]:
from gtts import gTTS
import os

if guard_mode:
    tts = gTTS("Guard mode activated", lang='en')
    tts.save("response.mp3")
    os.system("mpg123 response.mp3")  # or playsound("response.mp3")


## Milestone 2

In [None]:
!pip install mediapipe opencv-python pillow
!pip install gtts playsound

In [None]:
!pip install DeepFace

In [None]:
# Alternative implementation using MediaPipe (more reliable in Colab)
import mediapipe as mp
import cv2
import numpy as np
import pickle
from google.colab import files
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

# Initialize MediaPipe
mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

# Global storage
known_face_features = []
known_face_names = []

### Self Enrollment

In [None]:
from deepface import DeepFace

def extract_face_features_deepface(image):
    """
    Extract face embeddings using DeepFace (Facenet model).
    Returns 128-d or 512-d embedding and bounding box if available.
    """
    try:
        # DeepFace automatically detects and aligns face internally
        reps = DeepFace.represent(image, model_name='Facenet', enforce_detection=False)
        if len(reps) > 0:
            embedding = np.array(reps[0]['embedding'], dtype=np.float32)
            region = reps[0]['facial_area']
            bbox = (region['x'], region['y'], region['w'], region['h'])
            return embedding, bbox
        else:
            return None, None
    except Exception as e:
        print("❌ DeepFace failed to extract features:", e)
        return None, None
# ============ ENROLLMENT USING DEEPFACE ============

known_face_features = []
known_face_names = []

def enroll_trusted_face_deepface(name):
    """
    Enroll using DeepFace embeddings.
    """
    print(f"📸 Please upload an image of {name}")
    uploaded = files.upload()

    for filename in uploaded.keys():
        print(f"Processing image: {filename}")
        image = cv2.imread(filename)
        if image is None:
            print(f"❌ Could not load image: {filename}")
            continue

        embedding, bbox = extract_face_features_deepface(image)
        if embedding is not None:
            known_face_features.append(embedding)
            known_face_names.append(name)

            # Draw bbox if detected
            if bbox:
                x, y, w, h = bbox
                cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
                cv2.putText(image, f"Enrolled: {name}", (x, y-10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            plt.figure(figsize=(8,6))
            plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            plt.title(f"✅ Enrolled {name}")
            plt.axis('off')
            plt.show()
        else:
            print(f"❌ No face detected for {name}")

    return len(known_face_names)


def save_encodings_deepface():
    """
    Save DeepFace embeddings to pickle.
    """
    if len(known_face_features) == 0:
        print("❌ No faces to save!")
        return

    data = {
        'features': known_face_features,
        'names': known_face_names,
        'method': 'deepface'
    }

    with open('face_features_deepface.pkl', 'wb') as f:
        pickle.dump(data, f)

    print(f"✅ Saved {len(known_face_names)} faces: {known_face_names}")
def enroll_trusted_face_cpu_only(name):
    """
    CPU-only face_recognition enrollment
    """
    # Force CPU mode
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

    # Restart face_recognition in CPU mode
    try:
        import face_recognition
        face_recognition._load_dlib()  # Force reload
    except:
        pass

    print(f"Please upload an image of {name}")
    uploaded = files.upload()

    for filename in uploaded.keys():
        print(f"Processing image: {filename}")

        # Load image using OpenCV
        image_bgr = cv2.imread(filename)
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        # Display the uploaded image first
        plt.figure(figsize=(8, 6))
        plt.imshow(image_rgb)
        plt.title(f"Processing: {name}")
        plt.axis('off')
        plt.show()

        try:
            # Force CPU-only face encoding with smaller model
            face_encodings = face_recognition.face_encodings(
                image_rgb,
                num_jitters=1,  # Reduce jitters for speed
                model='small'   # Use small model
            )

            if len(face_encodings) > 0:
                face_encoding = face_encodings[0]
                known_face_encodings.append(face_encoding)
                known_face_names.append(name)

                print(f"✅ Successfully enrolled {name} using CPU-only mode")

            else:
                print(f"❌ No face found in the uploaded image for {name}")

        except Exception as e:
            print(f"❌ CPU-only encoding failed: {e}")
            print("Falling back to MediaPipe method...")
            return enroll_trusted_face_mediapipe(name)

    return len(known_face_encodings)

In [None]:
# WORKING ENROLLMENT - Choose one method
print("=== FACE ENROLLMENT PROCESS (FIXED) ===")

# Method 1: MediaPipe (Most stable for Colab)
print("Using MediaPipe for face detection...")
enroll_trusted_face_deepface("Soham")
save_encodings_deepface()

# Method 2: CPU-only face_recognition (uncomment if you prefer)
# print("Using CPU-only face_recognition...")
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
# enroll_trusted_face_cpu_only("Soham")
# save_encodings()

print(f"Total trusted faces enrolled: {len(known_face_names)}")


### Face Recognition

In [None]:
import cv2
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from google.colab import files
from base64 import b64decode
from IPython.display import display, Javascript
import mediapipe as mp

In [None]:
# ============ RECOGNITION USING DEEPFACE ============

# Load saved DeepFace embeddings
with open('face_features_deepface.pkl', 'rb') as f:
    data = pickle.load(f)
known_face_features = data['features']
known_face_names = data['names']

def recognize_faces_deepface(image, threshold=0.6):
    """
    Recognize faces in an image using DeepFace embeddings.
    Returns list of (name, confidence, bbox).
    """
    embedding, bbox = extract_face_features_deepface(image)
    if embedding is None:
        return []

    results = []
    best_name, best_sim = "Unknown", 0.0

    for idx, known in enumerate(known_face_features):
        sim = cosine_similarity([embedding], [known])[0][0]
        if sim > threshold and sim > best_sim:
            best_sim = sim
            best_name = known_face_names[idx]

    results.append((best_name, best_sim, bbox))
    return results


def display_recognition_deepface(image, results):
    img = image.copy()
    for name, conf, bbox in results:
        if bbox:
            x, y, w, h = bbox
            color = (0,255,0) if name!="Unknown" else (0,0,255)
            cv2.rectangle(img, (x,y), (x+w,y+h), color, 2)
            label = f"{name} ({conf:.2f})" if name!="Unknown" else "Unknown"
            cv2.putText(img, label, (x, y-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    plt.figure(figsize=(8,6))
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.show()


In [None]:
# 1) Process a single uploaded image
uploaded = files.upload()
for fn in uploaded:
    img = cv2.imread(fn)
    res = recognize_faces_deepface(img)
    display_recognition_deepface(img, res)

In [None]:
# 3) Process an uploaded video file, annotate frames, save and display output video with bbox checks
import cv2
import numpy as np
import pickle
from google.colab import files
from google.colab.patches import cv2_imshow
import mediapipe as mp
from sklearn.metrics.pairwise import cosine_similarity

# Load saved MediaPipe features
with open('face_features_deepface.pkl', 'rb') as f:
    data = pickle.load(f)
known_face_features = data['features']
known_face_names = data['names']

mp_face_detection = mp.solutions.face_detection

def extract_face_features_mediapipe(image):
    """
    Extract face features using MediaPipe with bbox validity checks.
    Returns feature vector and bounding box or (None, None).
    """
    with mp_face_detection.FaceDetection(model_selection=1,
                                          min_detection_confidence=0.5) as face_detection:
        rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = face_detection.process(rgb)
        if not results.detections:
            return None, None
        det = results.detections[0].location_data.relative_bounding_box
        h, w, _ = rgb.shape
        x, y = int(det.xmin * w), int(det.ymin * h)
        w_box, h_box = int(det.width * w), int(det.height * h)
        # Ensure bbox is within image
        x, y = max(0, x), max(0, y)
        w_box, h_box = min(w_box, w - x), min(h_box, h - y)
        if w_box <= 0 or h_box <= 0:
            return None, None
        face = rgb[y:y + h_box, x:x + w_box]
        if face.size == 0:
            return None, None
        face128 = cv2.resize(face, (128, 128)).flatten().astype(np.float32)
        return face128, (x, y, w_box, h_box)

def recognize_faces_mediapipe(image):
    """
    Recognize faces in a BGR image.
    Returns list of (name, confidence, bbox).
    """
    feats, bbox = extract_face_features_mediapipe(image)
    if feats is None:
        return []
    results = []
    for idx, known in enumerate(known_face_features):
        sim = cosine_similarity([feats], [known])[0][0]
        if sim > 0.75:
            results.append((known_face_names[idx], sim, bbox))
            break
    if not results:
        results.append(("Unknown", 0.0, bbox))
    return results

def test_video_file_with_display_and_save_deepface():
    uploaded = files.upload()
    for fn in uploaded.keys():
        cap = cv2.VideoCapture(fn)
        if not cap.isOpened():
            print(f"❌ Cannot open video {fn}")
            continue

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        fps = cap.get(cv2.CAP_PROP_FPS) or 20.0
        w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        out_name = f"annotated_{fn}"
        out = cv2.VideoWriter(out_name, fourcc, fps, (w, h))

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            results = recognize_faces_deepface(frame)
            for name, conf, bbox in results:
                if bbox:
                    x, y, bw, bh = bbox
                    color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)
                    cv2.rectangle(frame, (x, y), (x + bw, y + bh), color, 2)
                    label = name if name != "Unknown" else "Unknown"
                    cv2.putText(frame, label, (x, y - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
            out.write(frame)

        cap.release()
        out.release()
        print(f"✅ Saved annotated video as {out_name}")

        # Display preview
        cap2 = cv2.VideoCapture(out_name)
        ret2, frame2 = cap2.read()
        if ret2:
            cv2_imshow(frame2)
        cap2.release()

# Uncomment to run
test_video_file_with_display_and_save_deepface()


# 🧠 AI Room Guard – DeepFace Based Intruder Detection

## 🔍 Overview
This project uses **DeepFace (Facenet)** and **OpenCV** to create an AI-powered guard system that detects and differentiates between **known** and **unknown** faces in a video.

It can:
- Enroll trusted faces.
- Recognize them in uploaded videos.
- Mark known persons in 🟩 **green** and unknown persons in 🟥 **red**.
- Save the annotated video for review.

---

## ⚙️ How It Works
1. **Enrollment Phase**
   - Upload an image of a trusted person.
   - DeepFace extracts embeddings (face features) and stores them in a pickle file.
   ```python
   enroll_trusted_face_deepface("PersonName")
   save_encodings_deepface()
   ```
2. **Recognition Phase**
- Upload a video for analysis.
- Each frame is processed to recognize known faces.
- Annotated output is saved automatically.



# Milestone 3

# 🧠 AI Room Guard – Milestone 3 (LLM + Timed Escalation + Email Alert)

## 🚨 Overview
This system is an **intelligent AI Room Guard** that:
- Detects and identifies faces using **DeepFace (Facenet)**  
- Recognizes **known** vs. **unknown** persons  
- Uses **Google Gemini LLM** to generate human-like escalation messages  
- Converts text warnings to **speech (TTS)**  
- Sends an **email alert** to the owner when the unknown person persists  

All actions and warnings are **annotated on the video** in real time.

---

## ⚙️ Main Features
1. **Face Enrollment**  
   Upload and register trusted faces (DeepFace embeddings are stored).  

2. **Guard Mode**  
   Upload a video — the system monitors each frame:  
   - ✅ Known person → green box with name  
   - ❌ Unknown person → red box + escalation messages  

3. **Timed Escalation**
   - Level 1: Gentle warning (spoken + on-screen)  
   - Level 2: Stronger warning after 5 s  
   - Level 3: Final alert, sends email to owner  

4. **Video Annotation**
   - All warnings appear as white text on the frame  
   - Final video saved at:
     ```
     /content/Files/guard_out.mp4
     ```

---

## 🧩 How to Run
1. **Set credentials** in Colab → `userdata`:
   ```python
   userdata.set('SENDER_EMAIL', 'your@gmail.com')
   userdata.set('SENDER_PASSWORD', 'app_password')
   userdata.set('OWNER_EMAIL', 'owner@gmail.com')
   userdata.set('API_KEY', 'your_gemini_api_key')
```
2. **Enroll Trusted Faces** :
  ```python
  feat, name = enroll_face()
  save_faces([feat], [name])
```

3. **Activate Guard Mode**:  
- Using Voice command

4. **Downloading the Outputs**
5. **The model integrates DeepFace, LLM, Speech, and Email — forming a complete smart surveillance assistant.**

In [None]:
# ================================
# Milestone 3 with LLM + Timed Escalation + Email Alert
# ================================

# !pip install speechrecognition pydub gtts mediapipe opencv-python playsound openai
# !apt-get install -y ffmpeg

import os
import time
import cv2
import pickle
import mediapipe as mp
import numpy as np
import speech_recognition as sr
from gtts import gTTS
from google.colab import files
from sklearn.metrics.pairwise import cosine_similarity
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import google.generativeai as genai # For LLM
from google.colab import userdata

# ----------------------------
# Configuration / Credentials
# ----------------------------

FILES_DIR = "Files"
os.makedirs(FILES_DIR, exist_ok=True)

# OpenAI API key: set as environment variable or directly

# Email config
SMTP_SERVER = "smtp.gmail.com"
SMTP_PORT = 587
SENDER_EMAIL     = userdata.get('SENDER_EMAIL')
SENDER_PASSWORD  = userdata.get('SENDER_PASSWORD')
OWNER_EMAIL      = userdata.get('OWNER_EMAIL')

# Similarity threshold for recognizing a known face
SIM_THRESH = 0.75

# Time thresholds (in seconds) for escalation levels
ESCALATION_DELAY = 5  # seconds between levels

# ----------------------------
# Utility: Email sending
# ----------------------------

def send_email_alert(subject, body, to_addr=OWNER_EMAIL):
    """Send an email alert to the owner."""
    msg = MIMEMultipart()
    msg['From'] = SENDER_EMAIL
    msg['To'] = to_addr
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'plain'))
    try:
        server = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
        server.starttls()
        server.login(SENDER_EMAIL, SENDER_PASSWORD)
        server.sendmail(SENDER_EMAIL, to_addr, msg.as_string())
        server.quit()
        print("✅ Email alert sent.")
    except Exception as e:
        print("❌ Failed to send email:", str(e))

# ----------------------------
# Utility: Text-to-Speech
# ----------------------------

def speak(text, filename="response.mp3"):
    """Convert text to speech and play."""
    tts = gTTS(text, lang='en')
    path = os.path.join(FILES_DIR, filename)
    tts.save(path)
    print("Guard:", text)
    os.system(f"mpg123 {path} >/dev/null 2>&1")

# ----------------------------
# Face Recognition / Enrollment
# ----------------------------

mp_face_detection = mp.solutions.face_detection

from deepface import DeepFace

def extract_face_features(img):
    """
    Extract 128/512-D face embedding using DeepFace (Facenet model).
    Returns embedding vector and bounding box.
    """
    try:
        reps = DeepFace.represent(img, model_name='Facenet', enforce_detection=False)
        if len(reps) > 0:
            embedding = np.array(reps[0]['embedding'], dtype=np.float32)
            region = reps[0]['facial_area']
            bbox = (region['x'], region['y'], region['w'], region['h'])
            return embedding, bbox
        else:
            return None, None
    except Exception as e:
        print("❌ DeepFace failed:", e)
        return None, None


def enroll_face():
    """Upload an image to enroll a trusted face (DeepFace embedding)."""
    print("Upload an image of the trusted person:")
    uploaded = files.upload()
    for fn in uploaded.keys():
        img = cv2.imread(fn)
        emb, bbox = extract_face_features(img)
        if emb is not None:
            name = input("Enter name for this face: ")
            return emb, name
        else:
            print(f"❌ No face detected in {fn}")
    return None, None

def save_faces(features, names, path=os.path.join(FILES_DIR, "face_features.pkl")):
    with open(path, 'wb') as f:
        pickle.dump({'features': features, 'names': names}, f)
    print("Saved face features.")

def load_faces(path=os.path.join(FILES_DIR, "face_features.pkl")):
    if not os.path.exists(path):
        return [], []
    with open(path, 'rb') as f:
        d = pickle.load(f)
    return d['features'], d['names']

# ----------------------------
# LLM-based escalation message generation
# ----------------------------

def generate_escalation_message(level, context=None):
    genai.configure(api_key=userdata.get("API_KEY"))

    model = genai.GenerativeModel("gemini-2.5-pro")  # Use valid model name

    prompt = (
        "You are a guard AI in a security system. "
        f"An unknown person is present. This is escalation level {level}. "
        "Write a short polite-but-firm message asking the person to leave, "
        "and warning consequences if he stays.\n\n"
        "Message:"
    )
    if context:
        prompt += f"\nContext: {context}\n"

    response = model.generate_content(prompt)
    return response.text.strip()
# ----------------------------
# Guard Mode Video Processing with Escalation
# ----------------------------

def guard_mode_video_with_escalation():
    known_feats, known_names = load_faces()
    if not known_names:
        print("No trusted face enrolled. Enroll first.")
        return

    print("Upload video for guard monitoring:")
    uploaded = files.upload()
    vid_name = list(uploaded.keys())[0]
    cap = cv2.VideoCapture(vid_name)

    fps = cap.get(cv2.CAP_PROP_FPS) or 20.0
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    out_path = os.path.join(FILES_DIR, "guard_out.mp4")
    out = cv2.VideoWriter(out_path, fourcc, fps, (w, h))

    # Track escalation state
    escalation_start_time = None
    current_level = 0
    person_present = False
    msg = ""  # for on-frame message text

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        feat, bbox = extract_face_features(frame)
        recognized = False

        if feat is not None:
            # Compare with enrolled embeddings
            for idx, known in enumerate(known_feats):
                sim = cosine_similarity([feat], [known])[0][0]
                if sim > 0.6:  # DeepFace similarity threshold
                    recognized = True
                    # 🟩 Draw green box for known face
                    x, y, bw, bh = bbox
                    cv2.rectangle(frame, (x, y), (x + bw, y + bh), (0, 255, 0), 2)
                    cv2.putText(frame, known_names[idx], (x, y - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                    break

        if not recognized and feat is not None:
            # 🟥 Unknown person detected
            person_present = True
            x, y, bw, bh = bbox
            cv2.rectangle(frame, (x, y), (x + bw, y + bh), (0, 0, 255), 2)  # red box
            cv2.putText(frame, "Unknown", (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

            # 🔔 Escalation logic
            if escalation_start_time is None:
                escalation_start_time = time.time()
                current_level = 1
                msg = generate_escalation_message(current_level)
                speak(msg)
            else:
                elapsed = time.time() - escalation_start_time
                if current_level == 1 and elapsed > ESCALATION_DELAY:
                    current_level = 2
                    msg = generate_escalation_message(current_level)
                    speak(msg)
                elif current_level == 2 and elapsed > 2 * ESCALATION_DELAY:
                    current_level = 3
                    msg = generate_escalation_message(current_level)
                    speak(msg)

                    # 💌 Send final email alert
                    subject = "🚨 Security Alert: Unknown Person Detected"
                    body = f"An unknown person remained after multiple warnings.\n\nLast message: {msg}"
                    send_email_alert(subject, body)
                    print("🛑 Level 3 escalation reached — alert sent.")

                    # Write final annotated frame before exit
                    cv2.putText(frame, msg, (30, h - 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                (255, 255, 255), 2, cv2.LINE_AA)
                    out.write(frame)
                    break  # exit loop after escalation Level 3

            # 🧾 Annotate LLM message on frame
            if msg:
                cv2.putText(frame, msg, (30, h - 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                            (255, 255, 255), 2, cv2.LINE_AA)

        else:
            # If no unknowns, reset state
            if person_present:
                person_present = False
                escalation_start_time = None
                current_level = 0
                msg = ""

        # Write processed frame
        out.write(frame)

    cap.release()
    out.release()
    print(f"🔒 Guard video processing done. Output saved at: {out_path}")


# ----------------------------
# Main Flow
# ----------------------------

def get_voice_command():
    print("Upload audio file with activation command:")
    uploaded = files.upload()
    fn = list(uploaded.keys())[0]
    from pydub import AudioSegment
    audio = AudioSegment.from_file(fn)
    audio = audio.set_frame_rate(16000).set_channels(1)
    wav_path = os.path.join(FILES_DIR, "cmd.wav")
    audio.export(wav_path, format="wav")
    r = sr.Recognizer()
    with sr.AudioFile(wav_path) as src:
        aud = r.record(src)
        cmd = r.recognize_google(aud)
        print("Command:", cmd)
        return cmd.lower()

def process_command(cmd):
    if "guard my room" in cmd and not ("stop" in cmd or "don't" in cmd):
        return True
    return False

In [None]:

    print("=== AI Guard with LLM & Escalation ===")
    cmd = get_voice_command()
    if process_command(cmd):
        speak("Guard mode activating. Please enroll a face.")
        feat, name = enroll_face()
        if feat is None:
            print("Enrollment failed. Exiting.")
        else:
            save_faces([feat], [name])
            speak("Enrollment done. Please upload video for monitoring.")
            guard_mode_video_with_escalation()
    else:
        speak("Guard mode not activated.")
