In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# Define paths to the positive and negative folders
dataset_path = os.path.abspath("Widerfaces2")  
positive_path = os.path.join(dataset_path, "Positive")
negative_path = os.path.join(dataset_path, "Negative")

# Verify paths
if not os.path.exists(positive_path):
    print(f"Error: Positive path '{positive_path}' does not exist!")
if not os.path.exists(negative_path):
    print(f"Error: Negative path '{negative_path}' does not exist!")

In [3]:
# Preprocessing function
def preprocess_images(folder_path, label, size=(24, 24)):
    images = []
    labels = []
    for file in os.listdir(folder_path):
        if file.endswith(('.jpg', '.png')):
            img_path = os.path.join(folder_path, file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue  # Skip unreadable images
            img_resized = cv2.resize(img, size)
            images.append(img_resized)
            labels.append(label)
    return np.array(images), np.array(labels)

# Preprocess Positive and Negative samples
positive_images, positive_labels = preprocess_images(positive_path, label=1)
negative_images, negative_labels = preprocess_images(negative_path, label=0)

# Combine datasets
all_images = np.vstack((positive_images, negative_images))
all_labels = np.hstack((positive_labels, negative_labels))

print(f"Total images: {len(all_images)}")
print(f"Labels distribution: {np.unique(all_labels, return_counts=True)}")

Total images: 2330
Labels distribution: (array([0, 1]), array([ 150, 2180], dtype=int64))


In [4]:
# Compute integral image
def compute_integral_image(image):
    integral_img = np.zeros_like(image, dtype=np.int32)
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            integral_img[i, j] = image[i, j] + \
                                 (integral_img[i-1, j] if i > 0 else 0) + \
                                 (integral_img[i, j-1] if j > 0 else 0) - \
                                 (integral_img[i-1, j-1] if i > 0 and j > 0 else 0)
    return integral_img

# Haar-like features
def haar_feature_sum(integral_image, x, y, width, height, feature_type):
    if feature_type == 'edge':
        mid = height // 2
        top_sum = integral_image[y + mid, x + width] - integral_image[y, x + width] - integral_image[y + mid, x] + integral_image[y, x]
        bottom_sum = integral_image[y + height, x + width] - integral_image[y + mid, x + width] - integral_image[y + height, x] + integral_image[y + mid, x]
        return bottom_sum - top_sum
    elif feature_type == 'line':
        mid = width // 2
        left_sum = integral_image[y + height, x + mid] - integral_image[y, x + mid] - integral_image[y + height, x] + integral_image[y, x]
        right_sum = integral_image[y + height, x + width] - integral_image[y, x + width] - integral_image[y + height, x + mid] + integral_image[y, x + mid]
        return right_sum - left_sum
    else:
        raise ValueError("Invalid feature type")

# Extract Haar features
def extract_haar_features(images):
    features = []
    for img in images:
        integral_img = compute_integral_image(img)
        feature_vec = []
        for x in range(0, img.shape[1] - 10, 10):
            for y in range(0, img.shape[0] - 10, 10):
                feature_vec.append(haar_feature_sum(integral_img, x, y, 10, 10, 'edge'))
        features.append(feature_vec)
    return np.array(features)

# Extract features
haar_features = extract_haar_features(all_images)
print(f"Extracted Haar features for {len(haar_features)} images.")

# Split data
X_train, X_test, y_train, y_test = train_test_split(haar_features, all_labels, test_size=0.2, random_state=42)
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")

# Decision Stump class
class DecisionStump:
    def __init__(self):
        self.feature_idx = None  
        self.threshold = None  
        self.polarity = 1  
        self.alpha = 0 

    def predict(self, X):
        n_samples = X.shape[0]
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[X[:, self.feature_idx] < self.threshold] = -1
        else:
            predictions[X[:, self.feature_idx] >= self.threshold] = -1
        return predictions

# Train decision stump
def train_decision_stump(X, y, sample_weights):
    n_samples, n_features = X.shape
    stump = DecisionStump()
    min_error = float('inf')

    for feature_idx in range(n_features):
        feature_values = X[:, feature_idx]
        thresholds = np.unique(feature_values)

        for threshold in thresholds:
            for polarity in [1, -1]:
                predictions = np.ones(n_samples)
                if polarity == 1:
                    predictions[feature_values < threshold] = -1
                else:
                    predictions[feature_values >= threshold] = -1

                error = np.sum(sample_weights[y != predictions])
                if error < min_error:
                    min_error = error
                    stump.feature_idx = feature_idx
                    stump.threshold = threshold
                    stump.polarity = polarity

    return stump, min_error

# Train AdaBoost
def train_adaboost(X, y, n_classifiers):
    n_samples, n_features = X.shape
    sample_weights = np.ones(n_samples) / n_samples
    classifiers = []

    for _ in range(n_classifiers):
        stump, error = train_decision_stump(X, y, sample_weights)
        epsilon = 1e-10
        stump.alpha = 0.5 * np.log((1 - error) / (error + epsilon))
        predictions = stump.predict(X)
        sample_weights *= np.exp(-stump.alpha * y * predictions)
        sample_weights /= np.sum(sample_weights)
        classifiers.append(stump)

    return classifiers

# Predict with AdaBoost
def adaboost_predict(classifiers, X):
    n_samples = X.shape[0]
    final_predictions = np.zeros(n_samples)

    for stump in classifiers:
        predictions = stump.predict(X)
        final_predictions += stump.alpha * predictions

    return np.sign(final_predictions)

Extracted Haar features for 2330 images.
Training samples: 1864, Testing samples: 466


In [5]:
# Train cascaded classifiers
def train_cascade(X, y, stage_features):
    cascade_classifiers = []
    for stage, num_features in enumerate(stage_features):
        X_stage = X[:, :num_features]
        classifiers = train_adaboost(X_stage, y, n_classifiers=100)
        cascade_classifiers.append(classifiers)
        print(f"Trained stage {stage+1} with {num_features} features.")
    return cascade_classifiers

# Predict with cascaded classifiers
def cascade_predict(cascade_classifiers, X):
    """
    Predict using cascaded classifiers with early rejection.
    Args:
        cascade_classifiers: List of classifiers for each stage.
        X: Feature matrix for prediction.
    Returns:
        Predictions for the input data.
    """
    for stage, classifiers in enumerate(cascade_classifiers):
        
        num_features = stage_features[stage]
        X_stage = X[:, :num_features]
        
        
        predictions = adaboost_predict(classifiers, X_stage)
        
        # Early rejection: If any sample is classified as negative, reject it
        if np.any(predictions == -1):
            return -1  
    return 1  


# Define cascade stages
stage_features = [5, 10, 20]

# Convert labels to {-1, 1}
y_train_boost = np.where(y_train == 1, 1, -1)
y_test_boost = np.where(y_test == 1, 1, -1)

# Train the cascade
cascade_classifiers = train_cascade(X_train, y_train_boost, stage_features)


Trained stage 1 with 5 features.
Trained stage 2 with 10 features.
Trained stage 3 with 20 features.


In [6]:
# Evaluate cascade
cascade_results = [cascade_predict(cascade_classifiers, sample.reshape(1, -1)) for sample in X_test]
cascade_accuracy = np.mean(np.array(cascade_results) == y_test_boost)
print(f"Cascade Test Accuracy: {cascade_accuracy * 100:.2f}%")

Cascade Test Accuracy: 97.00%
