In [1]:
import cv2
from skimage import io
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import math
import sys
import pickle
import random
from haar_like_features import *
from AdaBoost.AdaBoost import *
from utils import *
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import AdaBoostClassifier as SklearnAdaBoost

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [18]:
utils = Utils()

In [166]:
class CascadeClassifier():
    def __init__(self, max_acceptable_false_positive_rate, min_acceptable_detection_rate, overall_false_positive_rate):
        """
        max_acceptable_false_positive_rate: maximum acceptable false positive rate for each layer
        min_acceptable_detection_rate: minimum acceptable detection rate for each layer
        overall_false_positive_rate: overall false positive rate for the cascade classifier
        clfs: list of strong classifiers
        """
        self.clfs = []
        self.f = max_acceptable_false_positive_rate
        self.d = min_acceptable_detection_rate
        self.F = overall_false_positive_rate
        self.utils = Utils()
    

    def _train_classifier(self, P, N, n):
        """
        train an AdaBoost classifier with n features
        """
        clf = AdaBoostClassifier(n_estimators=n)
        X, y = self.utils.merge_P_N(P, N)
        clf.fit(X, y)
        return clf
    
    def _get_clf_eval_data(self, clf, P, N, threshold):
        """
        get false positive rate of cascade classifier and detection rate
        """
        p_pred = clf.predict(P, threshold)
        n_pred = clf.predict(N, threshold)
        
        # Compute true positive, false positive, true negative, false negative
        true_positives = np.sum(p_pred == 1)
        false_positives = np.sum(n_pred == 1)
        true_negatives = np.sum(n_pred == -1)
        false_negatives = np.sum(p_pred == -1)

        # Calculate detection rate (D) and false positive rate (F)
        detection_rate = true_positives / (true_positives + false_negatives)
        false_positive_rate = false_positives / (false_positives + true_negatives)

        return false_positive_rate, detection_rate
    
    def _update_N(self, N, model):
        """
        update negative samples
        """
        false_N_pred = model.predict(N)
        false_N_pred = N[false_N_pred == 1]
        return false_N_pred
    def train(self, P_train, N_train, P_val, N_val, T):
        P = P_train
        N = N_train.copy()
        f1 = 1
        D1 = 1
        scs = []
        this.scs = []
        while i <= T:
            i = i + 1
            print(f"========= Training layer {i} ==========")
            F.append(1)
            D.append(1)
            n.append(0)
            
            clf = None
            while F[i] > self.f * F[i - 1]:
                # add one feature
                n[i] = n[i] + 1
                print(f"\t=== Start Training weak classifier with {n[i]} features ===")
                
                # 1. train classifier
                clf = self._train_classifier(P, N, n[i])
                # 2. evaluate classifier
                threshold = 0.5
                F[i], D[i] = self._get_clf_eval_data(clf, P_val, N_val, threshold)
                while D[i] < self.d * D[i - 1] and threshold > 0:
                    print(f"\tRetrain Classifier with {threshold} threshold ")
                    threshold = threshold - 0.1
                    F[i], D[i] = self._get_clf_eval_data(clf, P_val, N_val, threshold)
                print(f"\tFinished Training weak classifier with {n[i]} features with: ")
                print("\tFalse Postive rate = ", F[i])
                print("\tDetection rate = ", D[i])
                
            self.clfs.append(clf)
            N = self._update_N(N, clf)

In [172]:
class SimplifyCascadeClassifier():
    def __init__(self, layers):
        """
        max_acceptable_false_positive_rate: maximum acceptable false positive rate for each layer
        min_acceptable_detection_rate: minimum acceptable detection rate for each layer
        overall_false_positive_rate: overall false positive rate for the cascade classifier
        clfs: list of strong classifiers
        """
        self.clfs = []
        self.layers = layers
        self.T = len(layers)
        self.utils = Utils()
    

    def _train_classifier(self, P, N, n):
        """
        train an AdaBoost classifier with n features
        """
        clf = AdaBoostClassifier(n_estimators=n)
        X, y = self.utils.merge_P_N(P, N)
        clf.fit(X, y)
        return clf
    
    def _get_clf_eval_data(self, clf, P, N, threshold):
        """
        get false positive rate of cascade classifier and detection rate
        """
        p_pred = clf.predict(P, threshold)
        n_pred = clf.predict(N, threshold)
        
        # Compute true positive, false positive, true negative, false negative
        true_positives = np.sum(p_pred == 1)
        false_positives = np.sum(n_pred == 1)
        true_negatives = np.sum(n_pred == -1)
        false_negatives = np.sum(p_pred == -1)

        # Calculate detection rate (D) and false positive rate (F)
        detection_rate = true_positives / (true_positives + false_negatives)
        false_positive_rate = false_positives / (false_positives + true_negatives)

        return false_positive_rate, detection_rate
    
    def _update_N(self, N, model):
        """
        update negative samples
        """
        false_N_pred = model.predict(N)
        false_N_pred = N[false_N_pred == 1]
        return false_N_pred

    def train(self, P_train, N_train, P_val, N_val, T):
        i = 0
        while i < self.T and len(N_train) > 0:
            print(f"========= Training layer {i} ==========")
            clf = self._train_classifier(P_train, N_train, self.layers[i])
            self.clfs.append(clf)
            N_train = self._update_N(N_train, clf)
 
    def predict(self, X):
        """
        predict whether the image contains face
        """
        for clf in self.clfs:
            if clf.predict(X) == -1:
                return -1
        return 1

In [173]:
cascadeClassifier = CascadeClassifier(0.05, 0.05, 0.001)

In [174]:
simpl = SimplifyCascadeClassifier([10, 10, 10, 10, 10, 10])

In [175]:
P = utils.load_pickle("./dataset/pkls/train/faces_features.pkl")
N = utils.load_pickle("./dataset/pkls/train/non_faces_features.pkl")

P_train, P_val, N_train, N_val = utils.split_data(P, N, 0.1)
print("P_train: ", P_train.shape)
print("P_val: ", P_val.shape)
print("N_train: ", N_train.shape)
print("N_val: ", N_val.shape)

P_train:  (2186, 6066)
P_val:  (243, 6066)
N_train:  (2186, 6066)
N_val:  (243, 6066)


In [176]:
simpl.train(P_train, N_train, P_val, N_val, 6)



ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
cascadeClassifier.train(P_train, N_train, P_val, N_val, 4)

	=== Start Training weak classifier with 1 features ===
	Retrain Classifier with 0.5 threshold 
	Finished Training weak classifier with 1 features with: 
	False Postive rate =  1.0
	Detection rate =  1.0
	=== Start Training weak classifier with 2 features ===
	Retrain Classifier with 0.5 threshold 
	Finished Training weak classifier with 2 features with: 
	False Postive rate =  1.0
	Detection rate =  1.0
	=== Start Training weak classifier with 3 features ===
	Retrain Classifier with 0.5 threshold 
	Finished Training weak classifier with 3 features with: 
	False Postive rate =  1.0
	Detection rate =  1.0
	=== Start Training weak classifier with 4 features ===


KeyboardInterrupt: 

In [104]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(max_depth=20)
X, y = utils.merge_P_N(P_train, N_train)
w = np.ones(X.shape[0])
w[y == 1] *= 1 / (2 * P_train.shape[0])
w[y == 0] *= 1 / (2 * N_train.shape[0])
model.fit(X, y, sample_weight=w)

In [105]:
model.predict_proba(P)

array([1., 1., 1., ..., 0., 0., 0.])