In [8]:
import os
import cv2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Train a Custom Face Detection Model 
Let's train your own face detector, we can use a CNN-based model. Here i will use PyTorch:

In [9]:
def preprocess_images(image_dir, target_size=(224, 224)):
    images = []
    labels = []  # This will be used if you're working with labeled data like LFW
    # image_folder = os.path.join(image_dir, 'lfw-deepfunneled')  # Path to the folder containing images
    
    for root, dirs, files in os.walk(image_dir):  # Traverse through subfolders
        for file in files:
            if file.endswith(('jpg', 'jpeg', 'png')):  # Ensure you're processing image files
                img_path = os.path.join(root, file)
                img = cv2.imread(img_path)
                if img is not None:
                    # Resize image to target size
                    img = cv2.resize(img, target_size)
                    # Convert image to grayscale (optional)
                    # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    images.append(img)
                   
    images = np.array(images)
    return images

In [10]:
wf_data_path = "C:/Users/galin/Downloads/LFW dataset/lfw-deepfunneled"
lfw_images = preprocess_images(wf_data_path)

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

# Define a basic CNN architecture
class FaceDetectorCNN(nn.Module):
    def __init__(self):
        super(FaceDetectorCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(64*54*54, 128)
        self.fc2 = nn.Linear(128, 2)  # Binary classification: face or no face

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Prepare your data
transform = transforms.Compose([transforms.Resize((224, 224)),
                                transforms.ToTensor()])

# Use your preprocessed datasets (wider_images/lfw_images) here
train_dataset = datasets.ImageFolder(lfw_images, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Initialize model, loss, and optimizer
model = FaceDetectorCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(10):  # Train for 10 epochs
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


### 1. Dataset Collection
You will need a dataset with images of faces and non-faces. Common datasets used for face detection are:

Labeled Faces in the Wild (LFW)
FDDB (Face Detection Dataset and Benchmark)
For simplicity, let's assume you are using OpenCV's pre-trained Haar Cascade classifier for initial training.

In [1]:
wf_data_path = "C:/Users/ggeorgieva.HAEMIMONT/Downloads/LFW dataset/lfw-deepfunneled/"

### 2. Feature Extraction

To train a machine learning model, you need features that represent faces. Haar Features are a good choice for this task.

You can use HOG features, or you can directly use the Haar Cascade features (which OpenCV already provides as XML files) for detecting faces.

The HOG features are typically used with an SVM classifier, which could work well without using deep learning.

You can extract HOG features as follows:

In [4]:
import cv2
from skimage.feature import hog
from skimage import exposure

# Load image
image = cv2.imread(f'{wf_data_path}/lfw-deepfunneled/Zico/Zico_0001.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Extract HOG features
fd, hog_image = hog(gray, visualize=True, block_norm='L2-Hys')

# Rescale the HOG image for display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

# Show the result
cv2.imshow('HOG Image', hog_image_rescaled)
cv2.waitKey(0)
cv2.destroyAllWindows()


### 3. Train a Support Vector Machine (SVM) Classifier
You can use SVM to classify whether the extracted features correspond to a face or not.




In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np

# Assuming X contains the extracted features (HOG or others) and y contains labels (1 for face, 0 for non-face)
X = np.array([...])  # Features
y = np.array([...])  # Labels (1 for face, 0 for non-face)

# Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Initialize and train the SVM classifier
clf = SVC(kernel='linear')  # You can experiment with other kernels like 'rbf'
clf.fit(X_train, y_train)

# Test the classifier
y_pred = clf.predict(X_test)

# Evaluate performance
print(classification_report(y_test, y_pred))


You can experiment with different classifiers like Random Forest, k-NN, or Logistic Regression, but SVM tends to work well for image classification tasks.

### 4. Integrating the Model for Face Detection
Once you have a trained model, you can use OpenCV to detect faces in a live video feed or an image. You can use OpenCV's Haar Cascade or your SVM model for this task.

In [None]:
# Initialize the trained model (if using an SVM)
# clf = load_trained_model()  # Example of loading an SVM model

# Initialize OpenCV’s Haar Cascade classifier
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Read image
img = cv2.imread('test_image.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detect faces
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

# Draw rectangles around faces
for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

# Show the image with detected faces
cv2.imshow('Face Detection', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
