In [17]:
import whisper
import queue, threading, time
import numpy as np
from scipy.io.wavfile import write as wv
import tempfile, os
import sounddevice as sd
import pyttsx3
from datetime import datetime 
import pickle 
import face_recognition
import cv2
from pathlib import Path 
import google.generativeai as genai
from dotenv import load_dotenv
import requests, json
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart



In [19]:
load_dotenv()

True

In [20]:
class AIGuardAgent:
    def __init__(self):
        self.model = whisper.load_model("base.en")
        self.SAMPLE_RATE = 16000
        self.CHUNK_SECONDS = 2
        self.audio_queue = queue.Queue(maxsize=10)
        
        self.tts_engine = pyttsx3.init()
        self.tts_engine.setProperty('rate', 150)
        
        self.guard_mode = False
        self.listening = False
        self.stop_flag = False
        self.alarm_activated = False
        
        self.face_db_path = "face_database.pkl"
        self.ensure_face_db_directory()
        self.load_trusted_faces()
        
        # Thread safety
        self.thread_lock = threading.Lock()
        self.camera = None
        self.camera_lock = threading.Lock()
        
        # NEW: Authority notification settings
        self.authority_contacts = ["security@campus.edu"]
        self.webhook_url = "https://hooks.example.com/alert"
        
        # Face tracking
        self.unknown_person_tracker = {}
        self.conversation_memory = {}
        self.situation_context = {
            "time_of_day": "",
            "last_recognized_person": "",
            "recent_events": []
        }
        
        self.escalation_levels = {
            1: "polite warning",
            2: "firm warning", 
            3: "final warning"
        }
        
        self.activation_phrases = [
            "guard my room", "protect my room", "secure my room", 
            "start guard mode", "activate guard"
        ]
        
        self.deactivation_phrases = [
            "stop guard mode", "deactivate guard", "stand down",
            "stop monitoring", "goodbye guard"
        ]
        
        self.enrollment_phrases = [
            "enroll face", "register face", "add trusted person"
        ]
        
        self.current_dialog_context = ""
        self.setup_llm()


    ####################################
    #      RESOURCE MANAGEMENT         #
    ####################################    
    def ensure_face_db_directory(self):
        os.makedirs(os.path.dirname(self.face_db_path) or ".", exist_ok=True)
    def get_camera(self):
        """Access camera without threading issues"""
        with self.camera_lock:
            if self.camera is None or not self.camera.isOpened():
                self.camera = cv2.VideoCapture(0)
                if self.camera.isOpened():
                    self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
                    self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
            return self.camera

    def release_camera(self):
        """Safely release camera"""
        with self.camera_lock:
            if self.camera and self.camera.isOpened():
                self.camera.release()
                self.camera = None  
    
    def load_trusted_faces(self):
        """Load in trusted faces from file"""
        try:
            if os.path.exists(self.face_db_path):
                with open(self.face_db_path, 'rb') as f:
                    data = pickle.load(f)
                    self.known_face_encodings = data['encodings']
                    self.known_face_names = data['names']
                print(f"Loaded {len(self.known_face_names)} trusted faces")
            else:
                print("No face database found. Starting fresh.")
                self.known_face_encodings = []
                self.known_face_names = []
        except Exception as e:
            print(f"Error loading face database: {e}")
            self.known_face_encodings = []
            self.known_face_names = []

    def save_trusted_faces(self):
        """Save trusted faces to database"""
        try:
            with open(self.face_db_path, 'wb') as f:
                pickle.dump({
                    'encodings': self.known_face_encodings,
                    'names': self.known_face_names
                }, f)
            print(f"Saved {len(self.known_face_names)} faces to database")
        except Exception as e:
            print(f"Error saving face database: {e}")

    

    def _find_or_create_person_id(self,encoding):
        """Find existing person or create new ID"""
        # Check if this face matches any existing unknown person
        for person_id, data in self.unknown_person_tracker.items():
            stored_encoding = data['reference_encoding']
            distance = np.linalg.norm(stored_encoding - encoding)
            if distance < 0.6:  # Similarity threshold
                return person_id
        
        # New person - create ID
        new_id = max(self.unknown_person_tracker.keys(), default=0) + 1
        return new_id

    
    def _create_new_tracker(self, face_encoding):
        """Create new person tracker"""
        return {
            'reference_encoding': face_encoding,
            'first_seen': time.time(),
            'last_seen': time.time(),
            'escalation_level': 1,
            'response_cooldown': 30,
            'appearances': 1,
            'last_interaction': time.time()
        }

    def _update_person_tracking(self, person_id, face_encoding):
        """Update tracking and determine escalation level"""
        if person_id not in self.unknown_person_tracker:
            self.unknown_person_tracker[person_id] = self._create_new_tracker(face_encoding)
            return 1
        
        tracker = self.unknown_person_tracker[person_id]
        current_time = time.time()
        time_present = current_time - tracker['first_seen']
        
        tracker['last_seen'] = current_time
        tracker['appearances'] += 1
        
        # Determine escalation based on time present
        if time_present > 120:  # 2 minutes
            new_level = 3
        elif time_present > 60:  # 1 minute
            new_level = 2
        else:
            new_level = 1
            
        # Only escalate if level increases
        if new_level > tracker['escalation_level']:
            tracker['escalation_level'] = new_level
            tracker['response_cooldown'] = [30, 45, 60][new_level - 1]
            
        return tracker['escalation_level']
    
    def _update_situation_context(self):
        """Update dynamic context for better responses"""
        hour = datetime.now().hour
        if 5 <= hour < 12:
            time_desc = "morning"
        elif 12 <= hour < 18:
            time_desc = "afternoon" 
        else:
            time_desc = "evening"
        
        self.situation_context['time_of_day'] = time_desc

    def _format_conversation_history(self, history):
        """Format conversation history for LLM"""
        if not history:
            return "No previous conversation"
        return "\n".join([f"{speaker}: {text}" for speaker, text in history])

    def _get_fallback_response(self, escalation_level):
        """Fallback responses when LLM fails"""
        fallbacks = {
            1: "Hello, who are you and what are you doing here?",
            2: "I need you to leave this area immediately.",
            3: "Final warning! Security is being notified. Leave now!"
        }
        return fallbacks.get(escalation_level, "Please identify yourself.")




    def setup_llm(self):
        try:
            api_key = os.getenv('GEMINI_API_KEY')
            genai.configure(api_key=api_key)
            self.llm_model = genai.GenerativeModel('gemini-pro')
            print("Gemini LLM configured")
        except Exception as e:
            print(f"Error setting up LLM: {e}")

    def generate_escalation_response(self, escalation_level, person_id=0, context=""):
        """Context-aware response generation with memory"""
        # Update situation context
        self._update_situation_context()
        
        # Get conversation history for this person
        history = self.conversation_memory.get(person_id, [])
        recent_history = history[-3:]  # Last 3 exchanges
        
        dynamic_prompt = f"""
        You are an AI room guard agent. Current situation:
        - Time: {self.situation_context['time_of_day']}
        - Escalation level: {escalation_level}/3
        - Additional context: {context}
        
        Recent conversation with this person:
        {self._format_conversation_history(recent_history)}
        
        Generate a {['polite inquiry', 'firm warning', 'final alert'][escalation_level-1]}.
        Be concise (1-2 sentences), direct, and appropriate for the escalation level.
        """
        
        try:
            if self.llm_model:
                response = self.llm_model.generate_content(dynamic_prompt)
                response_text = response.text.strip()
            else:
                response_text = self._get_fallback_response(escalation_level)
            
            # Store in conversation memory
            if person_id not in self.conversation_memory:
                self.conversation_memory[person_id] = []
            self.conversation_memory[person_id].append(("Guard", response_text))
            
            return response_text
            
        except Exception as e:
            print(f"LLM error: {e}")
            return self._get_fallback_response(escalation_level)
    def activate_alarm_protocol(self):
        """Real alarm functionality for level 3 escalation"""
        if self.alarm_activated:
            return
            
        self.alarm_activated = True
        
        # Visual alarm
        alarm_thread = threading.Thread(target=self._flash_alarm, daemon=True)
        alarm_thread.start()
        
        # Audio alarm
        self.speak("INTRUDER ALERT! Security has been notified! Leave immediately!")
        
        # Notify authorities
        self.notify_authorities()
        
        print("ALARM ACTIVATED - Authorities notified")

    def _flash_alarm(self):
        """Visual alarm display"""
        while self.alarm_activated and self.guard_mode:
            # Create flashing alert window
            for color in [(0, 0, 255), (0, 0, 0)]:  # Red, Black
                alert_frame = np.zeros((200, 600, 3), dtype=np.uint8)
                alert_frame[:] = color
                
                cv2.putText(alert_frame, "INTRUDER ALERT!", (50, 100),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3)
                cv2.imshow("ALERT - INTRUDER DETECTED", alert_frame)
                cv2.waitKey(500)  # Flash every 500ms
                
        cv2.destroyWindow("ALERT - INTRUDER DETECTED")

    def notify_authorities(self):
        """Actually notify security/authorities"""
        try:
            # Save alert to file (always works)
            with open("intruder_alert.log", "a") as f:
                f.write(f"Alert at {datetime.now()}: Unknown person detected - Level 3 escalation\n")
            
            print("Alert logged to intruder_alert.log")
            
        except Exception as e:
            print(f"Failed to notify authorities: {e}")


    def handle_unknown_person(self, face_encoding, frame):
        """Complete unknown person handling with escalation"""
        person_id = self._find_or_create_person_id(face_encoding[0] if isinstance(face_encoding, list) else face_encoding)
        
        # Update tracking and get escalation level
        escalation_level = self._update_person_tracking(person_id, face_encoding)
        
        current_time = time.time()
        tracker = self.unknown_person_tracker[person_id]
        
        # Check if enough time has passed for next interaction
        if current_time - tracker['last_interaction'] > tracker['response_cooldown']:
            context = f"Person detected {tracker['appearances']} times, present for {int(current_time - tracker['first_seen'])} seconds"
            response = self.generate_escalation_response(escalation_level, person_id, context)
            
            self.speak(response)
            print(f"Level {escalation_level}: {response}")
            
            # Activate alarm for level 3
            if escalation_level == 3:
                self.activate_alarm_protocol()
            
            tracker['last_interaction'] = current_time

    def _find_matching_person(self,encoding,threshold=0.1):
        for person_id, data in self.unknown_person_tracker.items():
            stored_encoding = data['reference_encoding']
            distance = np.linalg.norm(stored_encoding - encoding)
            if distance < threshold:
                return person_id
        return None 
 
    


    def speak(self,text):
        self.tts_engine.say(text)
        self.tts_engine.runAndWait()
    
    
    
    def enroll_using_webcam(self,name="unknown"):
        self.camera=cv2.VideoCapture(0)
        self.speak(f"Please look at the camera for face enrollment as {name}")

        enrollment_frames = []
        frames_captured = 0
        max_frames = 10
        
        while frames_captured < max_frames:
            ret, frame = self.camera.read()
            if not ret:
                print(" Failed to capture frame")
                continue
            
            # Convert BGR to RGB
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # Find all face locations and encodings in the current frame
            face_locations = face_recognition.face_locations(rgb_frame)
            face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)
            
            if len(face_encodings) == 1:
                enrollment_frames.append(face_encodings[0])
                frames_captured += 1
                print(f" Captured face frame {frames_captured}/{max_frames}")
                
                # Show preview
                cv2.putText(frame, f"Enrolling: {frames_captured}/{max_frames}", 
                           (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            
            cv2.imshow("Face Enrollment - Press 'q' to cancel", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            
            time.sleep(0.5)  # Wait between captures
        
        cv2.destroyAllWindows()
        
        if enrollment_frames:
            # Average the encodings for better accuracy
            avg_encoding = np.mean(enrollment_frames, axis=0)
            self.known_face_encodings.append(avg_encoding)
            self.known_face_names.append(name)
            self.save_trusted_faces()
            self.speak(f"Successfully enrolled {name} as a trusted person")
            return True
        else:
            self.speak("Failed to capture face. Please try again.")
            return False
        
    def mic_stream(self):
        """Capture microphone audio in chunks"""
        def callback(indata, frames, time, status):
            if status:
                print(f"Audio status: {status}")
            self.audio_queue.put(indata.copy())
        
        with sd.InputStream(
            samplerate=self.SAMPLE_RATE, 
            channels=1, 
            dtype='int16', 
            callback=callback,
            blocksize=int(self.SAMPLE_RATE * self.CHUNK_SECONDS)
        ):
            while not self.stop_flag:
                time.sleep(0.1)


    def check_activation_command(self, text):
        """Check if text contains any activation phrase"""
        return any(phrase in text for phrase in self.activation_phrases)
    
    def check_deactivation_command(self, text):
        """Check if text contains any deactivation phrase"""
        return any(phrase in text for phrase in self.deactivation_phrases)
    
    def check_enrollment_command(self, text):
        """Check if text contains face enrollment phrase"""
        return any(phrase in text for phrase in self.enrollment_phrases)
    
    def activate_guard_mode(self):
        """Activate guard mode with voice confirmation"""
        with self.thread_lock:
            self.guard_mode = True
            self.unknown_person_tracker = {}
            self.conversation_memory = {}
            self.alarm_activated = False
        
        self.speak("Guard mode activated! Starting face monitoring.")
        print(f"Guard mode ACTIVATED at {datetime.now().strftime('%H:%M:%S')}")
        
        face_thread = threading.Thread(target=self.face_monitoring_loop, daemon=True)
        face_thread.start()

    
    def deactivate_guard_mode(self):
        """Deactivate guard mode with voice confirmation"""
        with self.thread_lock:
            self.guard_mode = False
            self.alarm_activated = False
        
        self.speak("Guard mode deactivated. Goodbye!")
        print(f"Guard mode DEACTIVATED at {datetime.now().strftime('%H:%M:%S')}")


    def process_audio_chunk(self, chunk):
        """Process audio chunk with Whisper and handle commands"""
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
            path = tmp.name
        
        try:
            wv(path, self.SAMPLE_RATE, chunk)
            result = self.model.transcribe(
                path, 
                language="en", 
                fp16=False, 
                condition_on_previous_text=False
            )
            
            text = (result.get("text") or "").strip().lower()
            
            if text:
                print(f"Heard: {text}")
                
                # Check for various commands
                if self.check_activation_command(text) and not self.guard_mode:
                    self.activate_guard_mode()
                elif self.check_deactivation_command(text) and self.guard_mode:
                    self.deactivate_guard_mode()
                elif self.check_enrollment_command(text):
                    self.speak("Starting face enrollment process.")
                    self.enroll_using_webcam()
                elif "how many trusted" in text or "list trusted" in text:
                    count = len(self.known_face_names)
                    if count == 0:
                        self.speak("No trusted faces enrolled yet.")
                    else:
                        self.speak(f"I have {count} trusted faces enrolled.")
                        print(f"Trusted faces: {', '.join(self.known_face_names)}")
                elif self.guard_mode:
                    print(f" In guard mode, heard: {text}")
                    
        except Exception as e:
            print(f"Error processing audio: {e}")
        finally:
            if os.path.exists(path):
                os.remove(path)
    
 
    def recognize(self,frame):
        if not self.known_face_encodings:
            return [],[]
        
        # convert to rgb 
        frame=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
        #find faces and then make encoding to store
        face_locations=face_recognition.face_locations(frame)
        face_encodings=face_recognition.face_encodings(frame,face_locations)
        recognized_names=[]
        recognized_status=[]
        for item in face_encodings:
            matches=face_recognition.compare_faces(self.known_face_encodings,item,tolerance=0.5)

            name="unknown"
            status="unknown"
            face_distances=face_recognition.face_distance(self.known_face_encodings,item)
            best_match_index=np.argmin(face_distances) if len(face_distances)>0 else None

            if matches[best_match_index]:
                name=self.known_face_names[best_match_index]
                status="trusted"

            recognized_names.append(name)
            recognized_status.append(status)

        return recognized_names,recognized_status,face_encodings
    
    
    def face_monitoring_loop(self):
        camera = self.get_camera()
        
        if not camera or not camera.isOpened():
            self.speak("Error accessing the webcam for face monitoring.")
            print("Error: Could not open webcam.")
            self.guard_mode = False
            return
        
        self.speak("Face monitoring started. Scanning for trusted individuals.")
        last_announcement = {}
        announcement_cd = 30

        while self.guard_mode and not self.stop_flag:
            ret, frame = camera.read()
            if not ret:
                print("Failed to capture frame from webcam")
                time.sleep(1)
                continue
            
            names, statuses, encodings = self.recognize(frame)
            curr_t = time.time()
            
            for name, status, encoding in zip(names, statuses, encodings):
                if status == "trusted":
                    if name not in last_announcement or (curr_t - last_announcement[name]) > announcement_cd:
                        self.speak(f"Hello {name}, welcome back!")
                        print(f" Recognized trusted person: {name}")
                        last_announcement[name] = curr_t
                elif status == "unknown":
                    # Only handle unknown person if not recently announced
                    if "unknown" not in last_announcement or (curr_t - last_announcement["unknown"]) > 10:
                        self.handle_unknown_person(encoding, frame)
                        last_announcement["unknown"] = curr_t
            
            # Draw face boxes
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(frame_rgb)
            
            for (top, right, bottom, left), name, status in zip(face_locations, names, statuses):
                color = (0, 255, 0) if status == "trusted" else (0, 0, 255)
                cv2.rectangle(frame, (left, top), (right, bottom), color, 2)
                cv2.putText(frame, f"{name} ({status})", (left, top - 10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            
            cv2.imshow("AI Guard - Face Monitoring (Press 'q' to stop)", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        cv2.destroyAllWindows()
        print("Face monitoring stopped")

    

  
    def start_listening(self):
        """Start the continuous listening loop"""
        self.listening = True
        self.stop_flag = False
        
        # Start microphone stream in background thread
        audio_thread = threading.Thread(target=self.mic_stream, daemon=True)
        audio_thread.start()
        
        self.speak("AI Guard system ready. Say 'Guard my room' to activate or 'Enroll face' to add trusted persons.")
        print(" Listening for commands...")
        print(f" {len(self.known_face_names)} trusted faces loaded")
        
        try:
            while self.listening and not self.stop_flag:
                if not self.audio_queue.empty():
                    chunk = self.audio_queue.get()
                    self.process_audio_chunk(chunk)
                else:
                    time.sleep(0.1)
                    
        except KeyboardInterrupt:
            print("\nShutting down AI Guard system...")
            self.stop_flag = True
    
    def stop_listening(self):
        """Stop the listening loop"""
        self.listening = False
        self.stop_flag = True
        if self.camera:
            self.camera.release()
        cv2.destroyAllWindows()
    

In [21]:
guard=AIGuardAgent()
guard.start_listening()

No face database found. Starting fresh.
Gemini LLM configured
 Listening for commands...
 0 trusted faces loaded
Heard: oh
Heard: have no
Heard: i just feel free.
Heard: lionsahah'll continue to keep their semi anfield south.
Heard: it's in the middle one.
Heard: it's about me.
Heard: they did come with me cute and sharpening.
Heard: appreciated the
Heard: my room.
Heard: i just wanted to make another sure for that...
Heard: yes.
Heard: guard my room.
Guard mode ACTIVATED at 19:43:57


Exception in thread Thread-554 (face_monitoring_loop):
Traceback (most recent call last):
  File "c:\Users\aj057\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "C:\Users\aj057\AppData\Roaming\Python\Python312\site-packages\ipykernel\ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "c:\Users\aj057\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\aj057\AppData\Local\Temp\ipykernel_78896\4030875819.py", line 534, in face_monitoring_loop
ValueError: not enough values to unpack (expected 3, got 2)


Heard: yeah.
 In guard mode, heard: yeah.
Heard: yeah
 In guard mode, heard: yeah
Heard: please want to bring movement to...
 In guard mode, heard: please want to bring movement to...
Heard: so, it just happened to you.
 In guard mode, heard: so, it just happened to you.
Heard: please, monitoring will come ahead.
 In guard mode, heard: please, monitoring will come ahead.
Heard: recognize is returning.
 In guard mode, heard: recognize is returning.
Heard: i don't know what you're expecting.
 In guard mode, heard: i don't know what you're expecting.
Heard: it's an error that goes with the paryngate system.
 In guard mode, heard: it's an error that goes with the paryngate system.
Heard: even if it's only 3000 feet.
 In guard mode, heard: even if it's only 3000 feet.
Heard: here return the ting to the gara.
 In guard mode, heard: here return the ting to the gara.

Shutting down AI Guard system...
