In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import time
from datetime import datetime

class IntervalAttentionDetector:
    def __init__(self, max_faces=5, analysis_interval=5):
        self.max_faces = max_faces
        self.analysis_interval = analysis_interval  # seconds
        self.last_analysis_time = 0
        
        # Initialize MediaPipe Face Mesh
        self.face_mesh = mp.solutions.face_mesh.FaceMesh(
            max_num_faces=max_faces,
            refine_landmarks=False,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7
        )
        
        # Load pre-trained models with thresholds
        self.models = {
            'vgg16': load_model('best_model_vgg16.keras'),
            'resnet50': load_model('best_model_resnet50.keras'),
            'inceptionv3': load_model('best_model_inceptionv3.keras')
        }
        self.thresholds = {'vgg16': 0.34, 'resnet50': 0.60, 'inceptionv3': 0.50}
        
        # Initialize video capture
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        
        # Analysis results storage
        self.results = []
        self.current_status = ["Analyzing..." for _ in range(max_faces)]
        
        # Performance tracking
        self.fps = 0
        self.frame_count = 0
        self.start_time = time.time()

    def preprocess_face(self, frame, landmarks):
        """Crop and prepare face for model input"""
        h, w = frame.shape[:2]
        x_coords = [lm.x * w for lm in landmarks.landmark]
        y_coords = [lm.y * h for lm in landmarks.landmark]
        x_min, x_max = int(min(x_coords)), int(max(x_coords))
        y_min, y_max = int(min(y_coords)), int(max(y_coords))
        
        # Add 50% margin
        margin = 0.50
        x_min = max(0, x_min - int((x_max - x_min) * margin))
        y_min = max(0, y_min - int((y_max - y_min) * margin))
        x_max = min(w, x_max + int((x_max - x_min) * margin))
        y_max = min(h, y_max + int((y_max - y_min) * margin))
        
        face_img = frame[y_min:y_max, x_min:x_max]
        if face_img.size == 0:
            return None
            
        return cv2.resize(face_img, (256, 256)).astype('float32') / 255.0

    def analyze_frame(self, frame):
        """Perform analysis on captured frame"""
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.face_mesh.process(rgb_frame)
        analysis_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        frame_results = {"time": analysis_time, "students": []}
        
        if results.multi_face_landmarks:
            for i, face_landmarks in enumerate(results.multi_face_landmarks[:self.max_faces]):
                face_img = self.preprocess_face(frame, face_landmarks)
                if face_img is None:
                    continue
                
                # Ensemble prediction
                predictions = []
                for name, model in self.models.items():
                    pred = model.predict(np.expand_dims(face_img, axis=0), verbose=0)[0][0]
                    predictions.append(1 if pred > self.thresholds[name] else 0)
                
                # Majority vote
                final_pred = max(set(predictions), key=predictions.count)
                status = "Distracted" if final_pred == 1 else "Attentive"
                self.current_status[i] = status
                
                # Store results
                frame_results["students"].append({
                    "id": i+1,
                    "status": status,
                    "models_voted": f"{predictions.count(final_pred)}/3"
                })
        
        self.results.append(frame_results)
        print(f"\nAnalysis at {analysis_time}:")
        for student in frame_results["students"]:
            print(f"Student {student['id']}: {student['status']} ({student['models_voted']} models agree)")

    def process_frame(self):
        """Main processing loop"""
        ret, frame = self.cap.read()
        if not ret:
            return None
        
        # Calculate FPS
        self.frame_count += 1
        elapsed_time = time.time() - self.start_time
        if elapsed_time > 1:
            self.fps = self.frame_count / elapsed_time
            self.start_time = time.time()
            self.frame_count = 0
        
        # Check if it's time to analyze
        current_time = time.time()
        if current_time - self.last_analysis_time >= self.analysis_interval:
            self.analyze_frame(frame.copy())  # Analyze a copy of the frame
            self.last_analysis_time = current_time
        
        # Real-time face detection (for visualization only)
        small_frame = cv2.resize(frame, (320, 240))
        rgb_small = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
        results = self.face_mesh.process(rgb_small)
        
        if results.multi_face_landmarks:
            for i, face_landmarks in enumerate(results.multi_face_landmarks[:self.max_faces]):
                nose = face_landmarks.landmark[1]
                x = int(nose.x * frame.shape[1])
                y = int(nose.y * frame.shape[0])
                
                # Display current status (from last analysis)
                status = self.current_status[i] if i < len(self.current_status) else "Unknown"
                color = (0, 255, 0) if status == "Attentive" else (0, 0, 255)
                
                cv2.putText(frame, f"Student {i+1}: {status}", (x, y - 10),
                          cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
                
                # Display next analysis time
                next_analysis = self.analysis_interval - (current_time - self.last_analysis_time)
                cv2.putText(frame, f"Next in: {max(0, int(next_analysis))}s", 
                           (x, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        
        # Display info
        cv2.putText(frame, f"FPS: {self.fps:.1f}", (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(frame, f"Interval: {self.analysis_interval}s", (10, 60),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        
        return frame

    def run(self):
        try:
            print(f"Starting attention detection with {self.analysis_interval}-second analysis intervals...")
            print("Press 'q' to quit\n")
            
            while True:
                frame = self.process_frame()
                if frame is None:
                    break
                
                cv2.imshow("Interval Attention Detection", frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
        finally:
            self.cap.release()
            cv2.destroyAllWindows()
            print("\nFinal Analysis Results:")
            for result in self.results:
                print(f"\n{result['time']}:")
                for student in result["students"]:
                    print(f"  Student {student['id']}: {student['status']}")

if __name__ == "__main__":
    detector = IntervalAttentionDetector(max_faces=5, analysis_interval=5)
    detector.run()

I0000 00:00:1745246645.210144  936928 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1745246645.218382  937086 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1745246645.221626  937086 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
  saveable.load_own_variables(weights_store.get(inner_path))


Starting attention detection with 5-second analysis intervals...
Press 'q' to quit


Analysis at 2025-04-21 20:14:10:


W0000 00:00:1745246651.080826  937089 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
2025-04-21 20:14:11.351 python[25653:936928] +[IMKClient subclass]: chose IMKClient_Modern
2025-04-21 20:14:11.351 python[25653:936928] +[IMKInputSession subclass]: chose IMKInputSession_Modern



Analysis at 2025-04-21 20:14:15:
Student 1: Attentive (3/3 models agree)

Analysis at 2025-04-21 20:14:20:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:14:25:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:14:30:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:14:35:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:14:40:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:14:45:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:14:50:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:14:55:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:15:00:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:15:05:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:15:10:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:15:15:
Student 1: Attentive (2/3 models agree)

Analysis at 2025-04-21 20:15:20:
Stud

: 