In [251]:
import cv2
import tensorflow as tf
from ultralytics import YOLO
import numpy as np
import time
import pandas as pd


In [252]:
class_labels = {
    0: 'person',
    1: 'bicycle',
    2: 'car',
    3: 'motorcycle',
    4: 'airplane',
    5: 'bus',
    6: 'train',
    7: 'truck',
    8: 'boat',
    9: 'traffic light',
    10: 'fire hydrant',
    11: 'stop sign',
    12: 'parking meter',
    13: 'bench',
    14: 'bird',
    15: 'cat',
    16: 'dog',
    17: 'horse',
    18: 'sheep',
    19: 'cow',
    20: 'elephant',
    21: 'bear',
    22: 'zebra',
    23: 'giraffe',
    24: 'backpack',
    25: 'umbrella',
    26: 'handbag',
    27: 'tie',
    28: 'suitcase',
    29: 'frisbee',
    30: 'skis',
    31: 'snowboard',
    32: 'sports ball',
    33: 'kite',
    34: 'baseball bat',
    35: 'baseball glove',
    36: 'skateboard',
    37: 'surfboard',
    38: 'tennis racket',
    39: 'bottle',
    40: 'wine glass',
    41: 'cup',
    42: 'fork',
    43: 'knife',
    44: 'spoon',
    45: 'bowl',
    46: 'banana',
    47: 'apple',
    48: 'sandwich',
    49: 'orange',
    50: 'broccoli',
    51: 'carrot',
    52: 'hot dog',
    53: 'pizza',
    54: 'donut',
    55: 'cake',
    56: 'chair',
    57: 'couch',
    58: 'potted plant',
    59: 'bed',
    60: 'dining table',
    61: 'toilet',
    62: 'TV',
    63: 'laptop',
    64: 'mouse',
    65: 'remote',
    66: 'keyboard',
    67: 'cell phone',
    68: 'microwave',
    69: 'oven',
    70: 'toaster',
    71: 'sink',
    72: 'refrigerator',
    73: 'book',
    74: 'clock',
    75: 'vase',
    76: 'scissors',
    77: 'teddy bear',
    78: 'hair drier',
    79: 'toothbrush'
}
 # Dictionary mapping class IDs to names


In [253]:
def load_yolo_model(model_path='yolo11n.pt'):
    global class_labels
    """
    Load YOLO model from a specified path.
    """
    model = YOLO(model_path)  # Adjust model variant based on accuracy/speed tradeoff
    class_labels = model.names  # Dictionary mapping class IDs to names

    return model


In [254]:
def initialize_video_stream(source=0):
    """
    Initialize video capture from the specified source.
    """
    video_capture = cv2.VideoCapture(source)
    if not video_capture.isOpened():
        print("Error: Unable to open video source.")
    return video_capture


In [255]:
def preprocess_frame(frame, target_size=(640, 640)):
    """
    Preprocess the frame to fit the YOLO input requirements.
    Convert from BGR to RGB and resize.
    """
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
    frame_resized = cv2.resize(frame_rgb, target_size)
    return frame_resized  # No normalization


In [256]:
def run_object_detection(model, frame):
    """
    Run object detection on a frame using the YOLO model.
    """
    results = model(frame)
    detections = results[0].boxes  # Access the boxes directly from the first result
    return detections


In [257]:
# def process_detections(detections, confidence_threshold=0.3):
#     """
#     Process and filter detections based on a confidence threshold.
#     """
#     filtered_detections = []
    
#     for box in detections:
#         conf = box.conf.item()  # Confidence score
#         if conf >= confidence_threshold:
#             # Extract bounding box coordinates
#             x1, y1, x2, y2 = box.xyxy[0]  # Coordinates
#             label = box.cls.item()  # Class label
#             filtered_detections.append({
#                 'box': (int(x1), int(y1), int(x2), int(y2)),
#                 'confidence': conf,
#                 'label': int(label)
#             })
    
#     return filtered_detections

# def process_detections(detections, confidence_threshold=0.3):
#     """
#     Process and filter detections based on a confidence threshold.
#     """
#     filtered_detections = []
#     for det in detections:
#         if det.conf >= confidence_threshold:
#             x1, y1, x2, y2 = map(int, det.xyxy[0])  # Bounding box coordinates
#             class_id = int(det.cls)  # Class ID
#             class_name = class_labels[class_id]  # Retrieve class name
#             confidence = float(det.conf)  # Confidence score
#             filtered_detections.append({
#                 'box': (x1, y1, x2, y2),
#                 'confidence': confidence,
#                 'label': class_name
#             })
#     return filtered_detections

# def process_detections(detections, confidence_threshold=0.3):
#     """
#     Process and filter detections based on a confidence threshold.
#     """
#     filtered_detections = []
#     for det in detections:
#         if det.conf >= confidence_threshold:
#             x1, y1, x2, y2 = map(int, det.xyxy[0])  # Bounding box coordinates
#             class_id = int(det.cls)  # Class ID
#             class_name = class_labels.get(class_id, 'Unknown')  # Retrieve class name
#             confidence = float(det.conf)  # Confidence score
#             filtered_detections.append({
#                 'box': (x1, y1, x2, y2),
#                 'confidence': confidence,
#                 'label': class_name
#             })
#     return filtered_detections


# def process_detections(results, confidence_threshold=0.3):
#     """
#     Process and filter detections based on a confidence threshold.
#     """
#     filtered_detections = []
#     for box in results.boxes:
#         if box.conf >= confidence_threshold:
#             x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coordinates
#             class_id = int(box.cls)  # Class ID
#             class_name = class_labels.get(class_id, 'Unknown')  # Retrieve class name
#             confidence = float(box.conf)  # Confidence score
#             filtered_detections.append({
#                 'box': (x1, y1, x2, y2),
#                 'confidence': confidence,
#                 'label': class_name
#             })
#     return filtered_detections

def process_detections(result, confidence_threshold=0.3):
    """
    Process and filter detections based on a confidence threshold.
    """
    filtered_detections = []
    for box in result.boxes:
        if box.conf >= confidence_threshold:
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coordinates
            class_id = int(box.cls)  # Class ID
            class_name = class_labels.get(class_id, 'Unknown')  # Retrieve class name
            confidence = float(box.conf)  # Confidence score
            filtered_detections.append({
                'box': (x1, y1, x2, y2),
                'confidence': confidence,
                'label': class_name
            })
    return filtered_detections


In [258]:
# def annotate_frame(frame, detections):
#     """
#     Draw bounding boxes and labels on the frame for each detection.
#     """
#     for det in detections:
#         x1, y1, x2, y2 = int(det['box'][0]), int(det['box'][1]), int(det['box'][2]), int(det['box'][3])
#         label = f"{det['label']} {det['confidence']:.2f}"
#         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#         cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
#     return frame

# def annotate_frame(frame, detections):
#     """
#     Draw bounding boxes and labels on the frame for each detection, including proximity.
#     """
#     for det in detections:
#         x1, y1, x2, y2 = det['BoundingBox_X1'], det['BoundingBox_Y1'], det['BoundingBox_X2'], det['BoundingBox_Y2']
#         label = f"{det['Label']} {det['Confidence']:.2f}"
#         proximity = det['Proximity']  # Use area as proximity measure
        
#         # Draw bounding box and label with proximity
#         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#         cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
#         cv2.putText(frame, f"Proximity: {int(proximity)}", (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        
#     return frame

# def annotate_frame(frame, detections):
#     """
#     Draw bounding boxes and labels on the frame for each detection, including object names and proximity.
#     """
#     for det in detections:
#         x1, y1, x2, y2 = det['BoundingBox_X1'], det['BoundingBox_Y1'], det['BoundingBox_X2'], det['BoundingBox_Y2']
#         label = f"{det['Label']} {det['Confidence']:.2f}"  # Show object name and confidence
#         proximity = det['Proximity']
        
#         # Draw bounding box and label with proximity
#         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#         cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
#         cv2.putText(frame, f"Proximity: {int(proximity)}", (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        
#     return frame


# def annotate_frame(frame, detections):
#     """
#     Draw bounding boxes and labels on the frame for each detection.
#     """
#     for det in detections:
#         x1, y1, x2, y2 = det['box']
#         label = f"{det['label']} {det['confidence']:.2f}"
#         # Draw bounding box
#         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#         # Put label text above the bounding box
#         cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
#     return frame
 
import cv2

def annotate_frame(frame, detections):
    """
    Draw bounding boxes and labels on the frame for each detection.
    """
    for det in detections:
        x1, y1, x2, y2 = det['box']
        label = f"{det['label']} {det['confidence']:.2f}"
        # Draw bounding box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        # Put label text above the bounding box
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    return frame



In [259]:
# def extract_detection_data(detections):
#     """
#     Extract metrics from detections and format for logging.
#     """
#     frame_data = []
#     for det in detections:
#         x1, y1, x2, y2 = det['box']
#         confidence = det['confidence']
#         label = det['label']
        
#         # Calculate width and height of bounding box
#         width = x2 - x1
#         height = y2 - y1
#         aspect_ratio = width / height if height > 0 else 0
        
#         # Frame-level data dictionary
#         data = {
#             'BoundingBox_X1': x1,
#             'BoundingBox_Y1': y1,
#             'BoundingBox_X2': x2,
#             'BoundingBox_Y2': y2,
#             'Confidence': confidence,
#             'Label': label,
#             'Width': width,
#             'Height': height,
#             'AspectRatio': aspect_ratio
#         }
#         frame_data.append(data)
#     return frame_data

# def extract_detection_data(detections):
#     """
#     Extract metrics from detections and format for logging.
#     """
#     frame_data = []
#     for det in detections:
#         x1, y1, x2, y2 = det['box']
#         confidence = det['confidence']
#         label = det['label']
        
#         # Calculate width, height, and area (proximity indicator)
#         width = x2 - x1
#         height = y2 - y1
#         area = width * height  # Use bounding box area as proximity indicator
#         aspect_ratio = width / height if height > 0 else 0
        
#         # Frame-level data dictionary
#         data = {
#             'BoundingBox_X1': x1,
#             'BoundingBox_Y1': y1,
#             'BoundingBox_X2': x2,
#             'BoundingBox_Y2': y2,
#             'Confidence': confidence,
#             'Label': label,
#             'Width': width,
#             'Height': height,
#             'AspectRatio': aspect_ratio,
#             'Proximity': area  # Higher area -> closer proximity
#         }
#         frame_data.append(data)
#     return frame_data

# def extract_detection_data(detections):
#     """
#     Extract metrics from detections and format for logging, including object names.
#     """
#     frame_data = []
#     for det in detections:
#         x1, y1, x2, y2 = det['box']
#         confidence = det['confidence']
#         class_id = det['label']
#         object_name = class_labels.get(class_id, 'Unknown')  # Map class ID to name
        
#         # Calculate width, height, and area (proximity indicator)
#         width = x2 - x1
#         height = y2 - y1
#         area = width * height  # Use bounding box area as proximity indicator
#         aspect_ratio = width / height if height > 0 else 0
        
#         # Frame-level data dictionary
#         data = {
#             'BoundingBox_X1': x1,
#             'BoundingBox_Y1': y1,
#             'BoundingBox_X2': x2,
#             'BoundingBox_Y2': y2,
#             'Confidence': confidence,
#             'Label': object_name,  # Use object name for labeling
#             'ClassID': class_id,
#             'Width': width,
#             'Height': height,
#             'AspectRatio': aspect_ratio,
#             'Proximity': area  # Higher area -> closer proximity
#         }
#         frame_data.append(data)
#     return frame_data


def extract_detection_data(detections):
    """
    Extracts detection data for logging and analysis.
    """
    data = []
    for det in detections:
        x1, y1, x2, y2 = det['box']
        label = det['label']
        confidence = det['confidence']
        data.append({
            'label': label,
            'confidence': confidence,
            'x1': x1,
            'y1': y1,
            'x2': x2,
            'y2': y2
        })
    return data


In [260]:
def save_detection_data_to_csv(log_data, filename='detection_log.csv'):
    """
    Save detection data to a CSV file for analysis.
    """
    df = pd.DataFrame(log_data)
    df.to_csv(filename, index=False)
    print(f"Detection data saved to {filename}")


In [261]:
# def calculate_average_confidence(df):
#     """
#     Calculate average confidence per class from detection data.
#     """
#     avg_conf = df.groupby('Label')['Confidence'].mean()
#     print("Average confidence per class:\n", avg_conf)
#     return avg_conf

def calculate_average_confidence(detection_data):
    """
    Calculates average confidence per class from detection data.
    """
    df = pd.DataFrame(detection_data)
    average_confidence_per_class = df.groupby('label')['confidence'].mean()
    return average_confidence_per_class


In [262]:
# def run_real_time_detection(model_path='yolov8n.pt', video_source=0, confidence_threshold=0.3, max_duration=120000):
#     """
#     Run real-time obstacle detection and log data for analysis.
#     Stop after max_duration seconds or if 'q' is pressed.
#     """
#     # Load model and initialize video stream
#     model = load_yolo_model(model_path)
#     video_stream = initialize_video_stream(video_source)
    
#     log_data = []  # List to store detection data for each frame
#     start_time = time.time()  # Record the start time
    
#     while True:
#         # Check if max duration has passed
#         elapsed_time = time.time() - start_time
#         if elapsed_time > max_duration:
#             print("Time limit reached. Ending detection.")
#             break
        
#         # Capture frame-by-frame
#         ret, frame = video_stream.read()
#         if not ret:
#             print("Error: Frame capture failed.")
#             break
        
#         # Preprocess and run detection
#         preprocessed_frame = preprocess_frame(frame)
#         detections = run_object_detection(model, preprocessed_frame)
        
#         # Process detections and calculate proximity
#         filtered_detections = process_detections(detections, confidence_threshold)
#         frame_data = extract_detection_data(filtered_detections)  # Now includes 'Proximity'
#         log_data.extend(frame_data)  # Append data for each detected object
        
#         # Annotate frame with proximity
#         annotated_frame = annotate_frame(frame, frame_data)  # Pass frame_data to include Proximity
        
#         # Display the resulting frame
#         cv2.imshow('Real-Time Obstacle Detection', annotated_frame)
        
#         # Break loop on 'q' key press
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             print("Detection ended by user.")
#             break
    
#     # Release video capture and close windows
#     video_stream.release()
#     cv2.destroyAllWindows()
    
#     # Save logged data to a CSV file for later analysis
#     save_detection_data_to_csv(log_data)


# def run_real_time_detection(confidence_threshold=0.3):
#     """
#     Run real-time object detection using the YOLOv11n model.
#     """
#     # Initialize video capture (0 for default camera)
#     cap = cv2.VideoCapture(0)

#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break

#         # Perform detection
#         model = load_yolo_model()
#         results = model(frame)

#         # Process detections
#         detections = process_detections(results, confidence_threshold)

#         # Annotate frame
#         annotated_frame = annotate_frame(frame, detections)

#         # Display the resulting frame
#         cv2.imshow('Real-Time Object Detection', annotated_frame)

#         # Break loop on 'q' key press
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break

#     # Release resources
#     cap.release()
#     cv2.destroyAllWindows()

# # Run the detection
# run_real_time_detection()

# import cv2

# def run_real_time_detection( confidence_threshold=0.3):
#     """
#     Run real-time object detection using the YOLOv11n model.
#     """
#     # Initialize video capture (0 for default camera)
#     cap = cv2.VideoCapture(0)

#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break

#         # Perform detection
#         model = load_yolo_model()
#         results = model(frame)

#         # Access the first Results object
#         result = results[0]

#         # Process detections
#         detections = process_detections(result, confidence_threshold)

#         # Annotate frame
#         annotated_frame = annotate_frame(frame, detections)

#         # Display the resulting frame
#         cv2.imshow('Real-Time Object Detection', annotated_frame)

#         # Break loop on 'q' key press
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break

#     # Release resources
#     cap.release()
#     cv2.destroyAllWindows()

#     # Save to csv
#     save_detection_data_to_csv(log_data)


def run_real_time_detection( confidence_threshold=0.3, log_file='detection_log.csv'):
    """
    Run real-time object detection using the YOLOv11n model with data extraction.
    """
    # Initialize video capture (0 for default camera)
    cap = cv2.VideoCapture(0)
    all_detections = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform detection
        model = load_yolo_model()
        results = model(frame)
        result = results[0]

        # Process detections
        detections = process_detections(result, confidence_threshold)

        # Annotate frame
        annotated_frame = annotate_frame(frame, detections)

        # Extract and log detection data
        frame_data = extract_detection_data(detections)
        all_detections.extend(frame_data)

        # Display the resulting frame
        cv2.imshow('Real-Time Object Detection', annotated_frame)

        # Break loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()

    # Save detection data to CSV
    df = pd.DataFrame(all_detections)
    df.to_csv(log_file, index=False)

    # Calculate and display average confidence per class
    average_confidence_per_class = calculate_average_confidence(all_detections)
    print("Average confidence per class:")
    print(average_confidence_per_class)

# # Run the detection
# run_real_time_detection()

In [263]:
# run_real_time_detection()
run_real_time_detection(confidence_threshold=0.3)




0: 480x640 1 person, 269.8ms
Speed: 6.0ms preprocess, 269.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 tv, 253.9ms
Speed: 5.0ms preprocess, 253.9ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 cat, 283.8ms
Speed: 6.0ms preprocess, 283.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 231.9ms
Speed: 5.0ms preprocess, 231.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 266.9ms
Speed: 7.0ms preprocess, 266.9ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 cat, 178.9ms
Speed: 4.0ms preprocess, 178.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 177.9ms
Speed: 5.0ms preprocess, 177.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 sink, 182.9ms
Speed: 5.0ms preprocess, 1

In [264]:
# df = pd.read_csv('detection_log.csv')
# average_confidence_per_class = calculate_average_confidence(df)

# df = pd.read_csv('detection_log.csv')

# Calculate average confidence per class
# average_confidence_per_class = df.groupby('label')['confidence'].mean()
# print("Average confidence per class:\n", average_confidence_per_class)

# Calculate average confidence per object class
# average_confidence_per_class = df.groupby('Label')['Confidence'].mean()
# print("Average confidence per class:\n", average_confidence_per_class)
