# Human-Detection-Using-Machine-Learning

## Train the model using YOLO

In [None]:
# Train the model using data sets
from ultralytics import YOLO

# Load model
model = YOLO('yolov11n.pt')  # Choose a model variant like 'yolov8n.pt', 'yolov8s.pt', etc.

# Train the model
train_results = model.train(
    data=r"data-sets/data.yaml",
    epochs=50,
    imgsz=416,  # Reduce size for faster CPU training
    device="cpu",
    workers=2  # Reduce workers for CPU efficiency
)

## Test trained model

### Image testing

In [None]:
from ultralytics import YOLO
# Load a model
model = YOLO("runs/detect/train/weights/best.pt")
# Perform object detection on an image
results = model("D:/Surya Files/Projects/Human-Detection-Using-Machine-Learning/Input/Pic", save=True)
# results[0].show()

### Video testing 

In [None]:
model = YOLO("runs/detect/train/weights/best.pt")
results = model("Input/Vid/In (9).mp4", save=True, stream=True)

## Output Rendering of Video's

In [None]:
from ultralytics import YOLO
import cv2
import os

# Load the trained YOLO model
model = YOLO("runs/detect/train/weights/best.pt")

# Input video path
video_path = "Input/Vid/In (2).mp4"

# Extract video name without extension
video_name = os.path.splitext(os.path.basename(video_path))[0]

# Define output path with modified filename
output_video_path = f"Output/{video_name}-Detected.mp4"

# Open the video file
cap = cv2.VideoCapture(video_path)

# Get video properties
frame_width = 1280  # Force resolution to 1080p width
frame_height = 720  # Force resolution to 720p height
fps = int(cap.get(cv2.CAP_PROP_FPS))  # Get FPS from original video

# Create a VideoWriter object to save output
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break  # Stop if the video ends

    # Resize the frame to 1080x720 to ensure consistency
    frame_resized = cv2.resize(frame, (frame_width, frame_height))  

    # Run YOLOv8 on the frame
    results = model(frame_resized)

    # Get the output frame with detections
    annotated_frame = results[0].plot()

    # Write frame to output video
    out.write(annotated_frame)

    # Show frame (optional)
    cv2.imshow("YOLOv8 Detection", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to exit
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Detection completed! Output saved as: {output_video_path}")

## Live Webcam Detection

In [1]:
from ultralytics import YOLO
import cv2
import os

# Paths
model_path = r'runs/detect/train/weights/best.pt'  # Adjust based on your trained model path
output_folder = r'Output'

# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Load the trained model
model = YOLO(model_path)

# **Use Live Webcam Feed**
video_source = 0  # 0 for default webcam, 1 for external webcam

# Open video capture
cap = cv2.VideoCapture(video_source)

# Check if the webcam opened successfully
if not cap.isOpened():
    print(f"Error: Could not access webcam")
    exit()

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS) or 30  # Default to 30 FPS if FPS is unavailable
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Set output video filename dynamically
output_video_name = "Webcam-Detected.mp4"
output_video_path = os.path.join(output_folder, output_video_name)

# Initialize VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

frame_number = 0
while True:
    ret, frame = cap.read()

    if not ret:
        print("Error: Failed to capture frame from webcam.")
        break

    # Perform inference
    results = model.predict(source=frame)

    # Process results
    annotated_frame = False  # Flag to track if any person was detected

    for result in results:
        boxes = result.boxes
        names = result.names

        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0].tolist()  # Get bounding box coordinates
                cls = int(box.cls[0])  # Get class index
                conf = box.conf[0]  # Get confidence score
                label = names[cls]

                if label == 'person' and conf >= 0.3:  # Check if detected object is a person
                    annotated_frame = True  # Set flag to True

                    # Draw bounding boxes and labels
                    cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                    cv2.putText(frame, f'{label} {conf:.2f}', (int(x1), int(y1) - 10), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    if annotated_frame:
        # Write the annotated frame to the video file
        out.write(frame)

    # Display the frame
    cv2.imshow('YOLO Live Detection', frame)

    # Press 'q' to quit the video loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame_number += 1

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

print(f'✅ Annotated live webcam video saved at: {output_video_path}')


0: 256x416 (no detections), 244.9ms
Speed: 1.0ms preprocess, 244.9ms inference, 0.5ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 14.6ms
Speed: 1.1ms preprocess, 14.6ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 16.1ms
Speed: 0.0ms preprocess, 16.1ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 13.3ms
Speed: 0.5ms preprocess, 13.3ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 14.1ms
Speed: 0.0ms preprocess, 14.1ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 18.0ms
Speed: 0.0ms preprocess, 18.0ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 14.5ms
Speed: 1.0ms preprocess, 14.5ms

KeyboardInterrupt: 