In [18]:
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import numpy as np
import urllib.request
import os
from collections import deque

In [19]:
# 1. OpenCV Face Detection Model (DNN-based face detector model files)
prototxt_url = "https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt"
caffemodel_url = "https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel"

In [20]:
# 2. Download the model files if they don't exist
if not os.path.exists("deploy.prototxt"):
    print("Downloading deploy.prototxt...")
    urllib.request.urlretrieve(prototxt_url, "deploy.prototxt")
if not os.path.exists("res10_300x300_ssd_iter_140000.caffemodel"):
    print("Downloading res10_300x300_ssd_iter_140000.caffemodel...")
    urllib.request.urlretrieve(caffemodel_url, "res10_300x300_ssd_iter_140000.caffemodel")

net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000.caffemodel")

Downloading deploy.prototxt...
Downloading res10_300x300_ssd_iter_140000.caffemodel...


In [21]:
# 3. Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [22]:
# 4. Build Model
model = models.mobilenet_v2(weights=None) 
model.classifier[1] = nn.Linear(model.last_channel, 2)

In [23]:
# 5. Load Model Weights
try:
    model.load_state_dict(torch.load('men_women_mobilenet.pth', map_location=device))
    print("Model loaded successfully!")
except FileNotFoundError:
    print("Error: 'men_women_mobilenet.pth' Not Found. Please train the model first.")

model.to(device)
model.eval()

Model loaded successfully!


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [24]:
# 6. Data Transformations
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# 7. Classes Label
classes = ['Men', 'Women']

In [26]:
# 8. Smoothing Setup
history_length = 7
pred_history = deque(maxlen=history_length)

In [27]:
# 9. Capture
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Can't Open Webcam")
else:
    print("press 'Q' to exit")
    try:
        while True:
            ret, frame = cap.read()
            if not ret: break

            frame = cv2.flip(frame, 1)
            h, w = frame.shape[:2]

            # OpenCV DNN detect faces
            blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
            net.setInput(blob)
            detections = net.forward()

            face_detected = False
            
            # Loop to process confidence more than 60%
            for i in range(0, detections.shape[2]):
                confidence = detections[0, 0, i, 2]

                if confidence > 0.6:
                    face_detected = True
                    box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                    (startX, startY, endX, endY) = box.astype("int")

                    # Box Padding
                    box_w = endX - startX
                    box_h = endY - startY
                    pad_w = int(box_w * 0.1)
                    pad_h = int(box_h * 0.2)

                    startX = max(0, startX - pad_w)
                    startY = max(0, startY - pad_h)
                    endX = min(w, endX + pad_w)
                    endY = min(h, endY + pad_h)

                    face_roi = frame[startY:endY, startX:endX]
                    if face_roi.size == 0: continue

                    face_rgb = cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)
                    pil_img = Image.fromarray(face_rgb)

                    input_tensor = data_transform(pil_img).unsqueeze(0).to(device)

                    with torch.no_grad():
                        outputs = model(input_tensor)
                        probs = F.softmax(outputs, dim=1)
                        men_prob = probs[0][0].item() 
                        pred_history.append(men_prob)

                    # Average
                    avg_men_prob = sum(pred_history) / len(pred_history)

                    if avg_men_prob > 0.5:
                        label = 'Men'
                        conf_val = avg_men_prob * 100
                        color = (255, 0, 0)
                    else:
                        label = 'Women'
                        conf_val = (1 - avg_men_prob) * 100
                        color = (147, 20, 255)
                
                    if conf_val < 60.0: 
                        label_text = f"Unknown ({conf_val:.0f}%)"
                        color = (100, 100, 100) 
                    else:
                        label_text = f"{label} {conf_val:.1f}%"

                    cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
                    cv2.putText(frame, label_text, (startX, startY-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

            # Reset if not detected to avoid stale predictions
            if not face_detected:
                pred_history.clear()

            cv2.imshow('Webcam', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    except KeyboardInterrupt:
        print("\nUser stopped program (Ctrl+C)")
    except Exception as e:
        print(f"\nAn error occurred: {e}")

    finally:
        print("Cleaning up resources...")
        if cap.isOpened():
            cap.release() 
        cv2.destroyAllWindows() 
        print("Camera released. Program ended.")

press 'Q' to exit
Cleaning up resources...
Camera released. Program ended.
