In [None]:
#import library
import cv2
from torchvision import transforms, models, datasets
import torch.nn as nn
import torch
import numpy as np
import matplotlib.pyplot as plt
#build cascade obj
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

In [None]:
#define vgg16 model
def get_model():
    model = models.vgg19(pretrained = True)
    # Freeze parameters so we don't backprop through them
    for param in model.parameters():
        param.requires_grad = False
    model.avgpool = nn.Sequential(
        nn.Conv2d(512,512, kernel_size=3),
        nn.MaxPool2d(2),
        nn.ReLU(),
        nn.Flatten()
    )
    class ageGenderClassifier(nn.Module):
        def __init__(self):
            super(ageGenderClassifier, self).__init__()
            self.intermediate = nn.Sequential(
                nn.Linear(2048,512),
                nn.ReLU(),
                nn.Dropout(0.4),
                nn.Linear(512,128),
                nn.ReLU(),
                nn.Dropout(0.4),
                nn.Linear(128,64),
                nn.ReLU(),
            )
            self.age_classifier = nn.Sequential(
                nn.Linear(64, 1),
                nn.Sigmoid()
            )
            self.gender_classifier = nn.Sequential(
                nn.Linear(64, 1),
                nn.Sigmoid()
            )
        def forward(self, x):
            x = self.intermediate(x)
            age = self.age_classifier(x)
            gender = self.gender_classifier(x)
            return gender, age

    model.classifier = ageGenderClassifier()

    return model.to(device)


In [None]:
def model_prediction(pre_img, model):

    face = pre_img.to(device).float()
    gender, age = model(face)

    pred_gender = gender.to('cpu').detach().numpy()
    pred_age = age.to('cpu').detach().numpy()

    return pred_gender, pred_age

In [None]:
# Import necessary libraries
import torch

# Load the YOLO model with pre-trained weights (replace with your specific model)
model = YOLOv5Model("yolov5s.pt")  # Replace with your model and weight file name
model.eval()  # Set model to evaluation mode

human_faces = []
for detection in results.pandas().xyxy[0]:  # Assuming results are in pandas format
    if detection["name"] == "person":  # Check for person class
        x_min, y_min, x_max, y_max = detection["xmin"], detection["ymin"], detection["xmax"], detection["ymax"]
        human_faces.append((x_min, y_min, x_max, y_max))

print("Human face coordinates:", human_faces)


In [None]:

 define detection function
# def detection(gray):

#     face = face_cascade.detectMultiScale(gray, 1.3, 5)

#     return face


In [None]:
#preprocess function
def preprocess_image(face, frame):
    if len(face) > 0: # Check if any faces were detected
        for x, y, w, h in face:
            frame = np.array(frame)
            crop_frame = frame[y:y+h, x:x+w]

            crop_frame = cv2.resize(crop_frame, (224, 224))
            crop_frame = torch.tensor(crop_frame).permute(2,0,1)
            crop_frame = normalize(crop_frame/255.)

            return crop_frame[None]
    else:
        return None # Explicitly return None if no faces are detected

In [None]:
model= get_model()
# model.load_state_dict(torch.load("vgg19", map_location=torch.device('cpu')))



In [None]:
cap = cv2.VideoCapture('/content/Faces.mp4')

print(cap.isOpened())

True


In [None]:
#Capture video from webcam
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_size = (width, height)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

# Initialize video writer.
video_output = cv2.VideoWriter('output.mp4', fourcc, fps, frame_size)

while cap.isOpened():

    _, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    detect_face = detection(gray)

    pre_img = preprocess_image(detect_face, frame)
    gender, age = model_prediction(pre_img, model)

    gender = np.where(gender[0][0]<0.5,'Male','Female')
    age = int(age[0][0]*116)
    for x, y, w, h in detect_face:
        cv2.rectangle(frame, (x,y), (x+w, y+h), (100, 50, 200), 3)

        cv2.putText(frame, f'gender:{gender}, age: {age}', (x, w+h), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), 1, cv2.LINE_AA)


    video_output.write(frame)

cap.release()
video_output.release()
