In [2]:
import cv2 #OpenCV for camera video operations
import numpy as np #for mathematical operations on array
from ultralytics import YOLO #object detection model
import torch #
import matplotlib.pyplot as plt
from PIL import Image #to handle diffarent image formats
import os #file and directory operations
from pathlib import Path

class YOLODetector: # we are trying to fetch the size of model from the user
    def __init__(self, model_size='n'):

        self.model_size = model_size #passing the selected size
        self.model_path = f'yolov8{model_size}.pt'
        
        # Loading YOLOv8 model
        print(f"Loading YOLOv8{model_size.upper()} model...")
        self.model = YOLO(self.model_path)
        print("Model loaded successfully!")
        
        # Get class names from the model
        self.class_names = self.model.names
        
        # Set confidence threshold
        self.conf_threshold = 0.25
        self.iou_threshold = 0.45
        
        # Generate colors for each class
        self.colors = self._generate_colors()
        
    def _generate_colors(self):
        """Generate random colors for each class"""
        np.random.seed(42)  # For consistent colors
        colors = []
        for i in range(len(self.class_names)):
            colors.append([int(c) for c in np.random.randint(0, 255, 3)])
        return colors
    
    def detect_objects(self, source, conf=None, iou=None, save_results=False, show_labels=True, show_conf=True):
        
        #Detect objects in image/video/webcam
        
        #    source: Image path, video path, or 0 for webcam
        #    conf: Confidence threshold
        #  iou: IoU threshold 
        # save_results: Whether to save detection results
        #show_labels: Whether to show class labels
        #show_conf: Whether to show confidence scores
    
        conf = conf or self.conf_threshold
        iou = iou or self.iou_threshold
        
        # Run detection
        results = self.model(
            source=source,
            conf=conf,
            iou=iou,
            save=save_results,
            show_labels=show_labels,
            show_conf=show_conf
        )
        
        return results
    
    def detect_webcam(self, conf=0.25, show_fps=True):
        #Real-time webcam detection
        cap = cv2.VideoCapture(0) # 0 stands for the default cam 
        
        if not cap.isOpened():
            print("Error: Could not open webcam")
            return
            
        print("Starting webcam detection. Press 'q' to quit.")
        
        # FPS calculation variables
        fps_counter = 0
        fps_start_time = cv2.getTickCount()
        fps = 0.0  # Initialize fps variable
        
        while True: #looping through the frames
            ret, frame = cap.read()
            if not ret:
                break
                
            # Run YOLO detection
            results = self.model(frame, conf=conf, verbose=False)#feeds the frames to YOLO models, verbose=FALSE  surpress extra log outputs
            
            # Draw results on frame
            annotated_frame = results[0].plot() #overlays bounding boxes, labels, and confidence scores directly on the frame.
            
            # Calculate and display FPS
            if show_fps:
                fps_counter += 1
                if fps_counter >= 10:  # Update FPS every 10 frames
                    fps_end_time = cv2.getTickCount()
                    fps = 10.0 / ((fps_end_time - fps_start_time) / cv2.getTickFrequency()) # for Smoother and less noisy FPS display
                    fps_start_time = fps_end_time
                    fps_counter = 0
                    
                cv2.putText(annotated_frame, f'FPS: {fps:.1f}', (10, 30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            # Display frame
            cv2.imshow('YOLO Webcam Detection', annotated_frame)
            
            # Make sure window has focus and check for key press
            cv2.setWindowProperty('YOLO Webcam Detection', cv2.WND_PROP_TOPMOST, 1)
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q') or key == ord('Q') or key == 27:  # 'q', 'Q', or ESC
                break
                
        cap.release()
        cv2.destroyAllWindows()
    
    # object detection in image
    def detect_image(self, image_path, save_result=True, display=True):
        #Detect objects in a single image
        if not os.path.exists(image_path):
            print(f"Error: Image file {image_path} not found")
            return None
            
        print(f"Processing image: {image_path}")
        
        # Run detection
        results = self.model(image_path)
        
        # Get the first result
        result = results[0]
        
        # Plot results
        annotated_image = result.plot()
        
        if save_result:
            # Save result
            output_path = f"detected_{Path(image_path).name}"
            cv2.imwrite(output_path, annotated_image)
            print(f"Result saved as: {output_path}")
        
        if display:
            # Display result
            cv2.imshow('YOLO Detection Result', annotated_image)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
        
        # Print detection summary
        self._print_detection_summary(result)
        
        return result
    
    #video object detection
    def detect_video(self, video_path, save_result=False):
        """Detect objects in a video file"""
        if not os.path.exists(video_path):
            print(f"Error: Video file {video_path} not found")
            return
            
        print(f"Processing video: {video_path}")
        
        cap = cv2.VideoCapture(video_path)
        
        # Get video properties for saving
        if save_result:
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            
            # Define codec and create VideoWriter
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            output_path = f"detected_{Path(video_path).name}"
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        frame_count = 0
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
                
            # Run detection
            results = self.model(frame, verbose=False)
            annotated_frame = results[0].plot() #results[0].plot() draws bounding boxes, labels, confidences.
            # annotated_frame is now a visualized frame.
            
            if save_result:
                out.write(annotated_frame)
            
            # Display progress
            frame_count += 1
            if frame_count % 30 == 0:  # Print progress every 30 frames
                progress = (frame_count / total_frames) * 100
                print(f"Processing: {progress:.1f}%")
            
            # Display frame (optional)
            cv2.imshow('YOLO Video Detection', annotated_frame)
            
            # Check for key press - make it more responsive
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q') or key == ord('Q') or key == 27:  # 'q', 'Q', or ESC
                break
        
        cap.release()
        if save_result:
            out.release()
            print(f"Processed video saved as: {output_path}")
        cv2.destroyAllWindows()
    
    def _print_detection_summary(self, result):
        #Print summary of detected objects
        if result.boxes is not None:
            detections = len(result.boxes)
            print(f"\nDetected {detections} objects:")
            
            # Count objects by class
            class_counts = {}
            for box in result.boxes:
                class_id = int(box.cls)
                class_name = self.class_names[class_id]
                confidence = float(box.conf)
                
                if class_name not in class_counts:
                    class_counts[class_name] = []
                class_counts[class_name].append(confidence)
            
            # Print summary
            for class_name, confidences in class_counts.items():
                count = len(confidences)
                avg_conf = np.mean(confidences)
                print(f"  {class_name}: {count} (avg confidence: {avg_conf:.2f})")
        else:
            print("No objects detected")
    
    def train_custom_model(self, dataset_path, epochs=100, imgsz=640):
        #Train a custom YOLO model on your dataset
        #    dataset_path: Path to dataset YAML file
        #   epochs: Number of training epochs
        #   imgsz: Image size for training
        
        print(f"Starting training with YOLOv8{self.model_size.upper()}...")
        
        # Start training
        results = self.model.train(
            data=dataset_path,
            epochs=epochs,
            imgsz=imgsz,
            plots=True,
            save=True
        )
        
        print("Training completed!")
        return results
    
    def export_model(self, format='onnx'):
        """
        Export model to different formats
        
        Args:
            format: Export format ('onnx', 'coreml', 'tflite', etc.)
        """
        print(f"Exporting model to {format.upper()} format...")
        self.model.export(format=format)
        print("Export completed!")
    
    def benchmark_model(self):
        """Benchmark model performance"""
        print("Running model benchmark...")
        metrics = self.model.benchmarks()
        return metrics

# Advanced YOLO Features
class AdvancedYOLODetector(YOLODetector):
    def __init__(self, model_size='n'):
        super().__init__(model_size)
    
    def detect_and_track(self, source):
        """Object detection with tracking"""
        print("Starting detection with tracking...")
        
        # Run tracking (requires ultralytics[track] installation)
        results = self.model.track(
            source=source,
            tracker="bytetrack.yaml",  # or "botsort.yaml"
            show=True,
            save=True
        )
        
        return results
    
    def pose_estimation(self, source):
        """Human pose estimation using YOLOv8 pose model"""
        pose_model = YOLO('yolov8n-pose.pt')
        results = pose_model(source, show=True, save=True)
        return results
    
    def segmentation(self, source):
        """Instance segmentation using YOLOv8 segmentation model"""
        seg_model = YOLO('yolov8n-seg.pt')
        results = seg_model(source, show=True, save=True)
        return results

# Example usage and demos
def main():
    print("YOLO Object Detection System")
    print("=" * 40)
    
    # Initialize detector
    print("\nChoose model size:")
    print("1. YOLOv8n (nano) - Fastest")
    print("2. YOLOv8s (small)")
    print("3. YOLOv8m (medium)")
    print("4. YOLOv8l (large)")
    print("5. YOLOv8x (extra large) - Most accurate")
    
    model_choice = input("Enter choice (1-5) [default: 1]: ").strip()
    model_sizes = {'1': 'n', '2': 's', '3': 'm', '4': 'l', '5': 'x'}
    model_size = model_sizes.get(model_choice, 'n')
    
    detector = YOLODetector(model_size=model_size)
    
    print("\nChoose detection mode:")
    print("1. Webcam detection")
    print("2. Image detection")
    print("3. Video detection")
    
    choice = input("Enter your choice (1-3): ").strip()
    
    if choice == '1':
        conf = float(input("Enter confidence threshold (0.1-1.0) [default: 0.25]: ") or 0.25)
        detector.detect_webcam(conf=conf)
        
    elif choice == '2':
        image_path = input("Enter image path: ").strip()
        detector.detect_image(image_path)
        
    elif choice == '3':
        video_path = input("Enter video path: ").strip()
        save = input("Save processed video? (y/n) [default: n]: ").lower().startswith('y')
        detector.detect_video(video_path, save_result=save)
    
    else:
        print("Invalid choice. Starting webcam detection...")
        detector.detect_webcam()

if __name__ == "__main__":
    # Install required packages
    print("Make sure you have installed the required packages:")
    print("pip install ultralytics opencv-python pillow matplotlib")
    print()
    
    main()

Make sure you have installed the required packages:
pip install ultralytics opencv-python pillow matplotlib

YOLO Object Detection System

Choose model size:
1. YOLOv8n (nano) - Fastest
2. YOLOv8s (small)
3. YOLOv8m (medium)
4. YOLOv8l (large)
5. YOLOv8x (extra large) - Most accurate
Loading YOLOv8S model...
Model loaded successfully!

Choose detection mode:
1. Webcam detection
2. Image detection
3. Video detection
Processing video: C:\Users\irfan\OneDrive\Documents\Addon\dl\City Street People Walking COPYRIGHT FREE Stock Video.mp4
Processing: 7.6%
Processing: 15.2%
Processing: 22.8%
Processing: 30.5%
Processing: 38.1%
Processing: 45.7%
