In [None]:
import cv2

cap = cv2.VideoCapture(0)
cap.set(3,640)
cap.set(4,480)

while True:
    ret, img = cap.read()
    cv2.imshow('WebCam', img)
    
    if cv2.waitKey(1) == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

In [None]:
pip install ultralytics

In [None]:
import cv2
from ultralytics import YOLO
import math
model = YOLO("yolo-Weights/yolov8n.pt")


# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)


classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

while True:
    success, img = cap.read()
    results = model(img, stream=True)

    # coordinates
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

            # put box in cam
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # confidence
            confidence = math.ceil((box.conf[0]*100))/100
            print("Confidence --->",confidence)
            print("box: ",box)

            # class name
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])

            # object details
            org = [x1, y1]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2

            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Running Fine

## Real-time Object detection with Person gender detection

In [1]:
import cv2
from ultralytics import YOLO
import math
import numpy as np
import time
from keras.models import load_model

# Load your gender classification model and face detection model here
model = load_model('D:\\My Python Stuff\\WebCamOpenCV\\Gender-Classification-ML-main\\training\\model-018.model')
face_clsfr = cv2.CascadeClassifier('D:\\My Python Stuff\\WebCamOpenCV\\Gender-Classification-ML-main\\haarcascade_frontalface_default.xml')

model_yolo = YOLO("yolo-Weights/yolov8n.pt")

# start webcam
cap = cv2.VideoCapture(0)
labels_dict = {0: 'Male', 1: 'Female'}

cap.set(3, 640)
cap.set(4, 480)

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

while True:
    success, img = cap.read()
    results = model_yolo(img, stream=True)

    # coordinates
    for r in results:
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            cls = int(box.cls[0])
            class_name = classNames[cls]

            if class_name == "person":
                # Crop the detected person's face
                person_face = img[y1:y2, x1:x2]

                # Perform gender detection using the gender model and face classifier
                gray = cv2.cvtColor(person_face, cv2.COLOR_BGR2GRAY)
                faces = face_clsfr.detectMultiScale(gray, 1.3, 5)
                for (fx, fy, fw, fh) in faces:
                    face_img = gray[fy:fy + fh, fx:fx + fw]
                    resized = cv2.resize(face_img, (32, 32))
                    normalized = resized / 255.0
                    reshaped = np.reshape(normalized, (1, 32, 32, 1))

                    start_time = time.time()
                    result = model.predict(reshaped)
                    end_time = time.time()

                    label = np.argmax(result, axis=1)[0]
                    gender = labels_dict[label]

                    # Add class name with gender
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    fontScale = 1
                    color = (255, 0, 0)
                    thickness = 2
                    cv2.putText(img, f'Gender: {gender}', (x1, y1 - 10), font, fontScale, color, thickness)
                    detection_time = end_time - start_time
                    time_text = f'Detection Time: {detection_time:.2f} seconds'
                    cv2.putText(img, time_text, (x1, y1 + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

            # Draw bounding box
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
            
            # Add class name
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2
            cv2.putText(img, class_name, (x1, y1 - 40), font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 (no detections), 461.9ms
Speed: 12.4ms preprocess, 461.9ms inference, 13.5ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 311.2ms
Speed: 0.0ms preprocess, 311.2ms inference, 47.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 267.4ms
Speed: 0.0ms preprocess, 267.4ms inference, 15.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 dog, 267.5ms
Speed: 0.0ms preprocess, 267.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 dog, 281.9ms
Speed: 0.0ms preprocess, 281.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 dog, 273.8ms
Speed: 32.2ms preprocess, 273.8ms inference, 11.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 269.6ms
Speed: 0.0ms preprocess, 269.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 300.2ms
Speed: 0.0ms preprocess, 300.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 269.9ms
Speed: 0.0ms preprocess, 269.9ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 dog, 270.2ms
Speed: 0.0ms preprocess, 270.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 279.0ms
Speed: 0.0ms preprocess, 279.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 158.1ms
Speed: 0.0ms preprocess, 158.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 dog, 149.9ms
Speed: 0.0ms preprocess, 149.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 139.3ms
Speed: 3.5ms preprocess, 139.3ms inference, 15.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 132.3ms
Speed: 0.0ms preprocess, 132.3ms inference, 0.0ms pos



0: 480x640 1 person, 126.2ms
Speed: 0.0ms preprocess, 126.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 123.3ms
Speed: 3.1ms preprocess, 123.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 131.7ms
Speed: 0.0ms preprocess, 131.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 142.1ms
Speed: 0.0ms preprocess, 142.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 dog, 164.9ms
Speed: 0.0ms preprocess, 164.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 141.0ms
Speed: 1.8ms preprocess, 141.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 237.0ms
Speed: 0.0ms preprocess, 237.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 dog, 163.2ms
Speed: 0.0ms preprocess, 163.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 dog, 296.7ms
Speed: 3.8ms preprocess, 296.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 270.8ms
Speed: 0.0ms preprocess, 270.8ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 155.2ms
Speed: 0.0ms preprocess, 155.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 125.7ms
Speed: 0.0ms preprocess, 125.7ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 119.9ms
Speed: 0.0ms preprocess, 119.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 125.7ms
Speed: 0.0ms preprocess, 125.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 dogs, 110.1ms
Speed: 0.0ms preprocess, 110.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 dog, 125.4ms
Speed: 0.0ms preprocess, 125.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 dog, 124.1ms
Speed: 2.6ms preprocess, 124.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 109.3ms
Speed: 1.3ms preprocess, 109.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 dog, 126.7ms
Speed: 0.0ms preprocess, 126.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 109.7ms
Speed: 0.0ms preprocess, 109.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 123.4ms
Speed: 3.1ms preprocess, 123.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 142.4ms
Speed: 0.0ms preprocess, 142.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 128.3ms
Speed: 0.0ms preprocess, 128.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 dog, 110.4ms
Speed: 0.0ms preprocess, 110.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 125.8ms
Speed: 0.0ms preprocess, 125.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 125.8ms
Speed: 0.0ms preprocess, 125.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.2ms
Speed: 0.0ms preprocess, 110.2ms inference, 12.5ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.7ms
Speed: 0.0ms preprocess, 110.7ms inference, 15.7ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.1ms
Speed: 0.0ms preprocess, 110.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 126.3ms
Speed: 0.0ms preprocess, 126.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 123.8ms
Speed: 2.3ms preprocess, 123.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 109.0ms
Speed: 2.1ms preprocess, 109.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 125.7ms
Speed: 0.0ms preprocess, 125.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 142.6ms
Speed: 0.0ms preprocess, 142.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 109.8ms
Speed: 0.0ms preprocess, 109.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 111.2ms
Speed: 0.0ms preprocess, 111.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 107.2ms
Speed: 2.1ms preprocess, 107.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 108.6ms
Speed: 1.1ms preprocess, 108.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.4ms
Speed: 0.0ms preprocess, 110.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.4ms
Speed: 0.0ms preprocess, 110.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.4ms
Speed: 0.0ms preprocess, 110.4ms inference, 15.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 109.6ms
Speed: 0.0ms preprocess, 109.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 111.0ms
Speed: 0.0ms preprocess, 111.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 109.5ms
Speed: 0.0ms preprocess, 109.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 110.1ms
Speed: 0.0ms preprocess, 110.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 110.2ms
Speed: 0.0ms preprocess, 110.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 110.3ms
Speed: 0.0ms preprocess, 110.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 110.0ms
Speed: 0.0ms preprocess, 110.0ms inference, 0.0ms postprocess per image at



0: 480x640 1 person, 110.1ms
Speed: 0.0ms preprocess, 110.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 116.2ms
Speed: 0.0ms preprocess, 116.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.4ms
Speed: 0.0ms preprocess, 110.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 123.6ms
Speed: 0.0ms preprocess, 123.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.2ms
Speed: 0.0ms preprocess, 110.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.7ms
Speed: 0.0ms preprocess, 110.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 126.3ms
Speed: 0.0ms preprocess, 126.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 145.0ms
Speed: 0.0ms preprocess, 145.0ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 124.8ms
Speed: 3.0ms preprocess, 124.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.2ms
Speed: 0.0ms preprocess, 110.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.1ms
Speed: 0.0ms preprocess, 110.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 cat, 110.3ms
Speed: 0.0ms preprocess, 110.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 cat, 109.9ms
Speed: 0.0ms preprocess, 109.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 110.0ms
Speed: 0.0ms preprocess, 110.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 108.8ms
Speed: 0.0ms preprocess, 108.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 110.5ms
Speed: 0.0ms preprocess, 110.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 142.6ms
Speed: 0.0ms preprocess, 142.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 125.9ms
Speed: 0.0ms preprocess, 125.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 125.4ms
Speed: 0.0ms preprocess, 125.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 141.5ms
Speed: 0.0ms preprocess, 141.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 203.6ms
Speed: 0.0ms preprocess, 203.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 159.7ms
Speed: 0.0ms preprocess, 159.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 110.6ms
Speed: 0.0ms preprocess, 110.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 123.0ms
Speed: 2.2ms preprocess, 123.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 142.4ms
Speed: 0.0ms preprocess, 142.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)
