In [1]:
import cv2
from ultralytics import YOLO
import numpy as np
from tensorflow.keras.applications.vgg16 import preprocess_input, VGG16
from tensorflow.keras.models import load_model
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img,img_to_array 

In [2]:
model = YOLO("yolov8m.pt")

In [9]:
cap = cv2.VideoCapture("/Users/kishore/Downloads/")

In [4]:
vgg16 = load_model("GenderClassification.h5")



In [5]:
def crop_humans(image, x, y, x1, y1):
    human_crops= []
    x1, y1, x2, y2 = x, y, x1, y1
    human_crop = image[y1:y2, x1:x2]
    human_crops.append(human_crop)
    return human_crops

In [6]:
def preprocess_for_vgg16(crops):
    preprocessed_images = []
    for crop in crops:
        resized_crop = cv2.resize(crop, (224, 224))
        img_array = img_to_array(resized_crop)
        img_array = preprocess_input(img_array)
        preprocessed_images.append(img_array)
    return np.array(preprocessed_images)

In [7]:
def classify_gender(vgg16_model, preprocessed_images):
    preds = vgg16_model.predict(preprocessed_images)
    return preds

In [8]:
df = pd.read_csv("coco-classes.txt", header = None)

In [None]:
while True:
    ret, frame = cap.read()
    if not ret:
        break
    results = model(frame, device = "mps")
    result = results[0]
    bboxes = np.array(result.boxes.xyxy.cpu(), dtype = "int")
    classes = np.array(result.boxes.cls.cpu(), dtype = "int")
    for bbox, cls in zip(bboxes, classes):
        (x, y, x1, y1) = bbox
        if cls == 0:
            human_crops = crop_humans(frame, x, y, x1, y1)
            if human_crops:
                preprocessed_image = preprocess_for_vgg16(human_crops)
                gender_preds = classify_gender(vgg16, preprocessed_image)
                print(gender_preds)
                for det in enumerate(bbox):
                    gender = "Female" if np.argmax(gender_preds) == 1 else "Male"
                    cv2.rectangle(frame, (x, y), (x1, y1), (0, 0, 255), 2)
                    cv2.putText(frame, gender, (x, y - 5), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 2)
    cv2.imshow("img", frame)
    key = cv2.waitKey(1)
    if key == 1:
        break
cap.release()
cv2.destroyAllWindows()


0: 384x640 1 person, 1 tie, 215.9ms
Speed: 11.7ms preprocess, 215.9ms inference, 1183.6ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step
[[ 1.4841e-14           1]]

0: 384x640 3 persons, 169.3ms
Speed: 11.5ms preprocess, 169.3ms inference, 49.4ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[[    0.99974  0.00026399]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[[ 0.00021056     0.99979]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[[   0.042697      0.9573]]

0: 384x640 2 persons, 1 cup, 32.9ms
Speed: 2.3ms preprocess, 32.9ms inference, 45.3ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[[  0.0014812     0.99852]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[[     0.9868      0.0132]