In [None]:
import cv2
import torch
import numpy as np
import random
import time
import threading
import requests
import json
import torch.nn as nn
import torch.optim as optim
from collections import namedtuple
from torchvision.models import mobilenet_v2
import psutil
import platform

# Try to import distro for Linux distribution detection.
try:
    import distro
except ImportError:
    distro = None

# -----------------------------
# Advanced Dueling Double DQN
# -----------------------------
class DuelingDQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DuelingDQN, self).__init__()
        self.feature = nn.Sequential(
            nn.Linear(state_size, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU()
        )
        self.value_stream = nn.Linear(128, 1)
        self.advantage_stream = nn.Linear(128, action_size)

    def forward(self, x):
        features = self.feature(x)
        value = self.value_stream(features)
        advantage = self.advantage_stream(features)
        return value + (advantage - advantage.mean(dim=1, keepdim=True))

# -----------------------------
# Prioritized Experience Replay
# -----------------------------
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done', 'priority'])

class PrioritizedReplayBuffer:
    def __init__(self, capacity, alpha=0.6):
        self.capacity = capacity
        self.alpha = alpha
        self.buffer = []
        self.pos = 0
        self.priorities = np.zeros(capacity, dtype=np.float32)
    
    def add(self, experience):
        max_prio = self.priorities.max() if self.buffer else 1.0
        if len(self.buffer) < self.capacity:
            self.buffer.append(experience)
        else:
            self.buffer[self.pos] = experience
        self.priorities[self.pos] = max_prio
        self.pos = (self.pos + 1) % self.capacity
    
    def sample(self, batch_size, beta=0.4):
        if len(self.buffer) == 0:
            return [], [], np.array([], dtype=np.float32)
        prios = self.priorities[:len(self.buffer)]
        probs = prios ** self.alpha
        probs /= probs.sum()
        indices = np.random.choice(len(self.buffer), batch_size, p=probs)
        samples = [self.buffer[idx] for idx in indices]
        total = len(self.buffer)
        weights = (total * probs[indices]) ** (-beta)
        weights /= weights.max()
        return samples, indices, np.array(weights, dtype=np.float32)
    
    def update_priorities(self, indices, priorities):
        for idx, prio in zip(indices, priorities):
            self.priorities[idx] = prio

# -----------------------------
# Optimized Object Detector
# -----------------------------
class OptimizedDetector:
    def __init__(self):
        # Load YOLOv5n model for object detection
        try:
            self.model = torch.hub.load('ultralytics/yolov5', 'yolov5n', pretrained=True)
            self.model.eval()
        except Exception as e:
            print("Error loading YOLOv5n model:", e)
            self.model = None
        
        # Load face detection model (OpenCV DNN)
        try:
            self.face_net = cv2.dnn.readNetFromCaffe(
                "deploy.prototxt.txt", 
                "res10_300x300_ssd_iter_140000.caffemodel"
            )
        except Exception as e:
            print("Error loading face detection model:", e)
            self.face_net = None
        
        # Load expression recognition model based on MobileNetV2
        try:
            self.expr_model = mobilenet_v2(pretrained=True)
            self.expr_model.classifier[1] = nn.Linear(1280, 7)
            self.expr_model.load_state_dict(torch.load('fer_mobilenet.pth', map_location='cpu'))
            self.expr_model.eval()
        except Exception as e:
            print("Error loading expression recognition model:", e)
            self.expr_model = None
        
        self.class_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']

    def detect_objects(self, frame, conf_threshold=0.5):
        if self.model is None:
            return []
        with torch.no_grad():
            results = self.model(frame)
        df = results.pandas().xyxy[0]
        df = df[df['confidence'] >= conf_threshold]
        return df

    def detect_faces(self, frame):
        if self.face_net is None:
            return None
        h, w = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, 
                                     (300, 300), (104.0, 177.0, 123.0))
        self.face_net.setInput(blob)
        faces = self.face_net.forward()
        return faces

    def detect_expression(self, face_roi):
        if self.expr_model is None:
            return -1
        try:
            face_resized = cv2.resize(face_roi, (224, 224))
        except Exception as e:
            print("Error resizing face ROI:", e)
            return -1
        with torch.no_grad():
            face_tensor = torch.tensor(face_resized / 255.0).permute(2, 0, 1).unsqueeze(0).float()
            outputs = self.expr_model(face_tensor)
        return torch.argmax(outputs, dim=1).item()

    def classify_gender_with_api(self, face_roi):
        """
        Sends the face ROI image to the API endpoint at http://127.0.0.1:11434
        for gender classification using the model "deepsake-r1:1.5b". The API
        is expected to return JSON containing a 'classification' field.
        """
        try:
            # Encode face ROI as JPEG in memory.
            ret, buf = cv2.imencode('.jpg', face_roi)
            if not ret:
                print("Failed to encode face ROI as JPEG.")
                return "Unknown"
            files = {"file": ("face.jpg", buf.tobytes(), "image/jpeg")}
            # Include both a prompt and the model name in the payload.
            data = {
                "prompt": "Is the person in this image a man or a woman?",
                "model": "deepsake-r1:1.5b"
            }
            response = requests.post("http://127.0.0.1:11434", files=files, data=data)
            response.raise_for_status()
            result = response.json()
            classification = result.get("classification", "Unknown")
            return classification
        except Exception as e:
            print("Error calling gender classification API:", e)
            return "Unknown"

# -----------------------------
# Enhanced DQN Agent
# -----------------------------
class EnhancedAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.buffer = PrioritizedReplayBuffer(5000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.997
        self.beta = 0.4
        self.model = DuelingDQN(state_size, action_size)
        self.target_model = DuelingDQN(state_size, action_size)
        self.target_model.load_state_dict(self.model.state_dict())
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.conf_threshold = 0.5
        self.update_target_every = 100
        self.step_count = 0

    def choose_action(self, state):
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        if random.random() < self.epsilon:
            return random.randrange(self.action_size)
        with torch.no_grad():
            q_values = self.model(state_tensor)
        return q_values.argmax().item()

    def update_confidence(self, action):
        # Action 0 increases and action 1 decreases the confidence threshold.
        self.conf_threshold = np.clip(
            self.conf_threshold + (0.05 if action == 0 else -0.05),
            0.1, 0.9
        )

    def replay(self, batch_size):
        if len(self.buffer.buffer) < batch_size:
            return
        samples, indices, weights = self.buffer.sample(batch_size, self.beta)
        
        states = torch.FloatTensor([s.state for s in samples])
        actions = torch.LongTensor([s.action for s in samples])
        rewards = torch.FloatTensor([s.reward for s in samples])
        next_states = torch.FloatTensor([s.next_state for s in samples])
        dones = torch.FloatTensor([s.done for s in samples])
        weights = torch.FloatTensor(weights)

        current_q = self.model(states).gather(1, actions.unsqueeze(1)).squeeze()
        next_q = self.target_model(next_states).detach().max(1)[0]
        target_q = rewards + (1 - dones) * self.gamma * next_q

        loss = ((current_q - target_q).pow(2) * weights).mean()
        prios = (current_q - target_q).pow(2) + 1e-5

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.buffer.update_priorities(indices, prios.detach().numpy())
        self.step_count += 1
        
        if self.step_count % self.update_target_every == 0:
            self.target_model.load_state_dict(self.model.state_dict())
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# -----------------------------
# Advanced Voice Recognizer
# -----------------------------
class AdvancedVoiceRecognizer:
    def __init__(self):
        try:
            import speech_recognition as sr
            self.sr = sr
        except ImportError:
            print("speech_recognition module not installed. Voice recognition disabled.")
            self.sr = None
            return
        self.recognizer = self.sr.Recognizer()
        try:
            self.microphone = self.sr.Microphone()
        except Exception as e:
            print("Error initializing microphone:", e)
            self.microphone = None

    def listen(self):
        if self.sr is None or self.microphone is None:
            return
        with self.microphone as source:
            print("Listening for voice command...")
            try:
                audio = self.recognizer.listen(source, phrase_time_limit=5)
                command = self.recognizer.recognize_google(audio)
                print("Voice command recognized:", command)
            except self.sr.UnknownValueError:
                print("Could not understand audio")
            except self.sr.RequestError as e:
                print("Request error:", e)

    def run(self):
        while True:
            self.listen()
            time.sleep(1)

# -----------------------------
# System Requirements Checker
# -----------------------------
def check_system_requirements():
    """Check if the system has at least 12GB of RAM and is running Fedora."""
    mem = psutil.virtual_memory().total
    if mem < 12 * 1024 * 1024 * 1024:
        print("Insufficient RAM for advanced voice recognition. (Required: 12GB)")
        return False
    # Check for Fedora
    if distro:
        if "fedora" not in distro.id().lower():
            print("Not running Fedora. Advanced voice recognition disabled.")
            return False
    else:
        if "fedora" not in platform.platform().lower():
            print("Not running Fedora. Advanced voice recognition disabled.")
            return False
    return True

# -----------------------------
# Main Vision System
# -----------------------------
class VisionSystem:
    def __init__(self):
        self.detector = OptimizedDetector()
        # The state vector includes: [object count, confidence threshold, frame variation, expression count]
        self.agent = EnhancedAgent(state_size=4, action_size=2)
        self.prev_frame = None
        self.frame_skip = 2
        self.current_skip = 0

    def process_frame(self, frame):
        # Skip frames to reduce computation.
        self.current_skip = (self.current_skip + 1) % self.frame_skip
        if self.current_skip != 0:
            return

        # Object detection.
        obj_results = self.detector.detect_objects(frame, self.agent.conf_threshold)
        obj_count = len(obj_results)

        # Face and expression detection.
        faces = self.detector.detect_faces(frame)
        expressions = []
        if faces is not None and faces.ndim == 4 and faces.shape[2] > 0:
            for i in range(faces.shape[2]):
                confidence = faces[0, 0, i, 2]
                if confidence > 0.5:
                    box = faces[0, 0, i, 3:7] * np.array([frame.shape[1], frame.shape[0]] * 2)
                    x1, y1, x2, y2 = box.astype("int")
                    x1, y1 = max(0, x1), max(0, y1)
                    x2, y2 = min(frame.shape[1]-1, x2), min(frame.shape[0]-1, y2)
                    face_roi = frame[y1:y2, x1:x2]
                    if face_roi.size != 0:
                        expr = self.detector.detect_expression(face_roi)
                        expressions.append(expr)
                        # Classify gender using the API.
                        gender = self.detector.classify_gender_with_api(face_roi)
                        print("Detected gender:", gender)
        else:
            # No valid face detections.
            pass

        # Compute frame variation.
        frame_variation = self.calculate_frame_variation(frame)

        # Build the state vector.
        state = [
            obj_count,
            self.agent.conf_threshold,
            frame_variation,
            len(expressions)
        ]

        # Decide on an action and update threshold accordingly.
        action = self.agent.choose_action(state)
        self.agent.update_confidence(action)

        # Calculate reward and add experience.
        reward = self.calculate_reward(obj_count, len(expressions))
        next_state = state.copy()
        self.agent.buffer.add(Experience(state, action, reward, next_state, False, 1.0))
        self.agent.replay(32)

    def calculate_frame_variation(self, current_frame):
        gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
        if self.prev_frame is None:
            self.prev_frame = gray
            return 0
        diff = cv2.absdiff(self.prev_frame, gray)
        self.prev_frame = gray
        return np.mean(diff)

    def calculate_reward(self, obj_count, expr_count):
        reward = 0
        if 3 <= obj_count <= 10:
            reward += 1
        if expr_count > 0:
            reward += 0.5
        return reward

# -----------------------------
# Main Application
# -----------------------------
def main():
    system = VisionSystem()
    cap = cv2.VideoCapture(0)

    # Start voice recognition in a background thread if system requirements are met.
    if check_system_requirements():
        voice_recognizer = AdvancedVoiceRecognizer()
        if voice_recognizer.sr and voice_recognizer.microphone:
            voice_thread = threading.Thread(target=voice_recognizer.run, daemon=True)
            voice_thread.start()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        system.process_frame(frame)
        cv2.imshow("Vision System", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    torch.set_num_threads(2)
    main()

Using cache found in /home/abdullahalazmi/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2025-2-5 Python-3.13.1 torch-2.6.0+cu124 CPU

Fusing layers... 
YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients, 4.5 GFLOPs
Adding AutoShape... 


Error loading face detection model: OpenCV(4.11.0) /io/opencv/modules/dnn/src/caffe/caffe_io.cpp:1126: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "deploy.prototxt.txt" in function 'ReadProtoFromTextFile'

Error loading expression recognition model: [Errno 2] No such file or directory: 'fer_mobilenet.pth'
Insufficient RAM for advanced voice recognition. (Required: 12GB)


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with a