# Nightjars Object Detection and Distance Estimation

This notebook combines object detection using YOLOv8 and distance estimation for detected objects.

In [None]:
# Import required libraries
import os
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from pathlib import Path
import matplotlib.pyplot as plt

# Set environment variables
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

## Constants and Configuration

In [None]:
# Distance constants
KNOWN_DISTANCE = 45  # INCHES
PERSON_WIDTH = 16  # INCHES
MOBILE_WIDTH = 3.0  # INCHES
CHAIR_WIDTH = 20.0  # INCHES
LAPTOP_WIDTH = 12  # INCHES

# Object detector constants
CONFIDENCE_THRESHOLD = 0.4
NMS_THRESHOLD = 0.3

# Colors for visualization
COLORS = [(151, 157, 255),(56, 56, 255), (31, 112, 255), (29, 178, 255), (49, 210, 207), 
          (10, 249, 72), (23, 204, 146), (134, 219, 61), (52, 147, 26), (187, 212, 0),
          (168, 153, 44), (255, 194, 0), (147, 69, 52), (255, 115, 100), (236, 24, 0),
          (255, 56, 132), (133, 0, 82), (255, 56, 203), (200, 149, 255), (199, 55, 255)]

WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
FONTS = cv2.FONT_HERSHEY_PLAIN

## Helper Functions

In [None]:
def focal_length_finder(measured_distance, real_width, width_in_rf):
    """Calculate focal length using known distance and object width"""
    return (width_in_rf * measured_distance) / real_width

def distance_finder(focal_length, real_object_width, width_in_frame):
    """Calculate distance using focal length and object width"""
    return (real_object_width * focal_length) / width_in_frame

def object_detector(image, model):
    """Detect objects in the image using YOLOv8 model"""
    results = model(image)
    data_list = []
    detected_objects = {}
    
    for result in results:
        for box, score, class_id in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
            x1, y1, x2, y2 = map(int, box)
            center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
            height, width, _ = image.shape
            
            if (center_x, center_y) in detected_objects:
                continue
            else:
                detected_objects[(center_x, center_y)] = True
            
            W_pos = "left" if center_x <= width / 3 else "center" if center_x <= 2 * width / 3 else "right"
            H_pos = "top" if center_y <= height / 3 else "mid" if center_y <= 2 * height / 3 else "bottom"
            
            color = COLORS[int(class_id) % len(COLORS)]
            label = f"{model.names[int(class_id)]} : {score:.2f}"
            
            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(image, label, (x1, y1 - 10), FONTS, 0.5, color, 2)
            
            if int(class_id) in [0, 67, 56, 72]:  # person, mobile, chair, laptop
                data_list.append([model.names[int(class_id)], x2 - x1, (x1, y1 - 2), W_pos, H_pos])
    
    return data_list, image

## Load Model and Reference Images

In [None]:
# Load YOLOv8 model
model = YOLO('yolov8xcdark.pt')

# Load reference images
ref_person = cv2.imread('ReferenceImages/image14.png')
ref_mobile = cv2.imread('ReferenceImages/image4.png')
ref_chair = cv2.imread('ReferenceImages/image22.png')
ref_laptop = cv2.imread('ReferenceImages/image2.png')

# Get reference widths
person_data, _ = object_detector(ref_person, model)
person_width_in_rf = person_data[0][1]

mobile_data, _ = object_detector(ref_mobile, model)
mobile_width_in_rf = mobile_data[0][1]

chair_data, _ = object_detector(ref_chair, model)
chair_width_in_rf = chair_data[0][1]

# Calculate focal lengths
focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
focal_chair = focal_length_finder(KNOWN_DISTANCE, CHAIR_WIDTH, chair_width_in_rf)

## Process Image Function

In [None]:
def process_image(image_path):
    """Process an image and return the annotated image with distance information"""
    # Read image
    frame = cv2.imread(image_path)
    if frame is None:
        raise ValueError(f"Could not read image at {image_path}")
    
    # Detect objects
    data, annotated_frame = object_detector(frame, model)
    
    # Process each detected object
    for d in data:
        if d[0] == 'person':
            distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
        elif d[0] == 'cell phone':
            distance = distance_finder(focal_mobile, MOBILE_WIDTH, d[1])
        elif d[0] == 'chair':
            distance = distance_finder(focal_chair, CHAIR_WIDTH, d[1])
        
        x, y = d[2]
        text1, text2 = d[3], d[4]
        
        # Add distance information
        cv2.rectangle(annotated_frame, (x+2, y+4), (x + 150, y + 20), BLACK, -1)
        cv2.putText(annotated_frame, f'Distance: {round(distance, 2)} inches', 
                    (x + 7, y + 17), FONTS, 0.58, WHITE, 1)
        
        # Generate guidance text
        if distance > 100:
            guidance = "Get closer"
        elif 50 < round(distance) <= 100 and text2 == "mid":
            guidance = "Go straight"
        else:
            guidance = f"{d[0]} {int(round(distance))} inches, take left or right"
            
        print(f"Object: {d[0]}, Distance: {round(distance, 2)} inches, Guidance: {guidance}")
    
    return annotated_frame

## Example Usage

In [None]:
# Process a test image
test_image_path = 'test_images/test1.jpg'  # Replace with your test image path
result_image = process_image(test_image_path)

# Display the result
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

## Video Processing Function

In [None]:
def process_video(video_path, output_path):
    """Process a video file and save the annotated output"""
    # Open video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video at {video_path}")
    
    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    # Create video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        # Process frame
        data, annotated_frame = object_detector(frame, model)
        
        # Add distance information for each detected object
        for d in data:
            if d[0] == 'person':
                distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
            elif d[0] == 'cell phone':
                distance = distance_finder(focal_mobile, MOBILE_WIDTH, d[1])
            elif d[0] == 'chair':
                distance = distance_finder(focal_chair, CHAIR_WIDTH, d[1])
            
            x, y = d[2]
            cv2.rectangle(annotated_frame, (x+2, y+4), (x + 150, y + 20), BLACK, -1)
            cv2.putText(annotated_frame, f'Distance: {round(distance, 2)} inches', 
                        (x + 7, y + 17), FONTS, 0.58, WHITE, 1)
        
        # Write frame to output video
        out.write(annotated_frame)
    
    # Release resources
    cap.release()
    out.release()
    
    print(f"Video processing complete. Output saved to {output_path}")

## Example Video Processing

In [None]:
# Process a test video
test_video_path = 'test_videos/test1.mp4'  # Replace with your test video path
output_video_path = 'output_videos/processed_test1.mp4'
process_video(test_video_path, output_video_path)