In [9]:
import cv2
import time
import torch
import socket
import struct
import numpy as np
from utils.general import non_max_suppression, scale_coords
from utils.torch_utils import select_device
from datetime import datetime

In [10]:
# Load pre-trained YOLOv7 model and set device
device = select_device('0' if torch.cuda.is_available() else 'cpu')
print(device)
model = torch.load('yolov7.pt', map_location=device)['model']
model.eval()

cuda:0


  model = torch.load('yolov7.pt', map_location=device)['model']


Model(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (1): Conv(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (2): Conv(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (3): Conv(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (4): Conv(
      (conv): Conv2d(128, 64, kernel_size=(1, 1), 

In [11]:
def preprocess_image(img, img_size=640):
    # Get original image dimensions
    h, w = img.shape[:2]
    
    # Calculate the resize ratio while keeping aspect ratio
    scale = min(img_size / h, img_size / w)
    
    # Calculate new dimensions
    new_w, new_h = int(w * scale), int(h * scale)
    
    # Resize image
    img_resized = cv2.resize(img, (new_w, new_h))  # Resize keeping aspect ratio
    
    # Padding to fit img_size if needed (to make the image square)
    top_pad = (img_size - new_h) // 2
    bottom_pad = img_size - new_h - top_pad
    left_pad = (img_size - new_w) // 2
    right_pad = img_size - new_w - left_pad
    
    # Pad the resized image with zeros (black pixels)
    img_resized = cv2.copyMakeBorder(img_resized, top_pad, bottom_pad, left_pad, right_pad, cv2.BORDER_CONSTANT, value=(0, 0, 0))
    
    # Convert BGR to RGB and HWC to CHW format for PyTorch
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)
    img_resized = np.ascontiguousarray(img_resized)
    
    # Normalize to 0-1
    img_resized = torch.from_numpy(img_resized).float() / 255.0
    
    # Add batch dimension
    if img_resized.ndimension() == 3:
        img_resized = img_resized.unsqueeze(0)

    return img_resized
    

In [12]:
def detect_humans(image, model, device, img_size=640, conf_thresh=0.25, iou_thresh=0.45):
    # Preprocess image
    img_preprocessed = preprocess_image(image, img_size).to(device)
    
    # Check if the model is using half precision and adjust input
    if next(model.parameters()).dtype == torch.float16:
        img_preprocessed = img_preprocessed.half()  # Convert image to half precision

    with torch.no_grad():
        pred = model(img_preprocessed)[0]  # Run inference
    
    # Apply non-max suppression to filter out multiple detections
    pred = non_max_suppression(pred, conf_thresh, iou_thresh, classes=[0])  # Class 0 is humans
    
    return pred


In [13]:
def plot_detections(image, predictions, img_size=640):
    img_copy = image.copy()
    h, w = img_copy.shape[:2]  # Get original image dimensions

    for det in predictions:
        if len(det):
            # Convert from normalized coordinates back to original image coordinates
            det[:, :4] = scale_coords((img_size, img_size), det[:, :4], img_copy.shape).round()
            for *xyxy, conf, cls in det:
                # Draw bounding box
                cv2.rectangle(img_copy, (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])), (255, 0, 0), 2)
                cv2.putText(img_copy, f'Person {conf:.2f}', (int(xyxy[0]), int(xyxy[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    return img_copy

In [14]:
# Function to calculate the center of a bounding box
def calculate_bbox_center(x1, y1, x2, y2):
    center_x = (x1 + x2) // 2
    center_y = (y1 + y2) // 2
    return center_x, center_y

def calculate_direction(center_x, center_y, frame_center_x, frame_center_y, x1, y1, x2, y2):
    instructions = []

    # Check if the frame center lies inside the bounding box
    if x1 <= frame_center_x <= x2 and y1 <= frame_center_y <= y2:
        instructions.append("Centered")
    else:
        # Horizontal adjustment
        if frame_center_x < x1:
            instructions.append("Move Right")
        elif frame_center_x > x2:
            instructions.append("Move Left")

        # Forward/Backward adjustment
        if frame_center_y < y1:
            instructions.append("Move Forward")
        elif frame_center_y > y2:
            instructions.append("Move Backward")

    return ", ".join(instructions)


In [15]:
# Capture video feed from the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open video source.")
    exit()

In [16]:
# Process the video feed frame by frame
while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture image.")
        break

    # Get the dimensions of the frame
    frame_height, frame_width = frame.shape[:2]
    frame_center_x = frame_width // 2
    frame_center_y = frame_height // 2

    # Run human detection on the current frame
    predictions = detect_humans(frame, model, device)

    num_people = 0
    closest_person = None
    closest_distance = float('inf')

    for det in predictions:
        for *xyxy, conf, cls in det:
            if conf.item() >= 0.5:
                num_people += 1

                # Extract bounding box coordinates
                x1, y1, x2, y2 = map(int, xyxy)

                # Calculate bounding box center
                bbox_center_x, bbox_center_y = calculate_bbox_center(x1, y1, x2, y2)

                # Calculate distance from frame center to bounding box center
                distance = ((bbox_center_x - frame_center_x) ** 2 + (bbox_center_y - frame_center_y) ** 2) ** 0.5

                # Update closest person if necessary
                if distance < closest_distance:
                    closest_distance = distance
                    closest_person = (bbox_center_x, bbox_center_y, x1, y1, x2, y2)
    print(num_people)
    if closest_person:
        bbox_center_x, bbox_center_y, x1, y1, x2, y2 = closest_person
        direction = calculate_direction(bbox_center_x, bbox_center_y, frame_center_x, frame_center_y,x1,y1,x2,y2)
        print(f"Closest Person at ({bbox_center_x}, {bbox_center_y}): {direction}")
    else:
        print("No humans detected.")

    # Plot detections on the frame
    frame_with_detections = plot_detections(frame, predictions)

    # Display the frame with detections
    cv2.imshow('YOLOv7 Human Detection', frame_with_detections)

    # Exit loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("Exiting live feed")
        break

# Release the video capture and close windows
cap.release()
cv2.destroyAllWindows()


1
Closest Person at (346, 390): Centered
1
Closest Person at (346, 390): Centered
1
Closest Person at (345, 390): Centered
1
Closest Person at (346, 391): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (347, 390): Centered
1
Closest Person at (346, 390): Centered
1
Closest Person at (347, 391): Centered
1
Closest Person at (346, 391): Centered
1
Closest Person at (346, 391): Centered
1
Closest Person at (347, 391): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (347, 390): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (348, 390): Centered
1
Closest Person at (348, 391): Centered
1
Closest Person at (348, 391): Centered
1
Closest Person at (348, 391): Centered
1
Closest Person at (349, 391): Centered
1
Closest Person at (349, 391): Centered
1
Closest Person

In [17]:
import sys
print(sys.executable)


c:\Users\kolak\AppData\Local\Programs\Python\Python312\python.exe
