In [50]:
from utils import *
import numpy as np
import matplotlib.pyplot as plt
from haar_like_features import *
from AdaBoost.AdaBoost import *
from sklearn.metrics import confusion_matrix, classification_report
utils = Utils()

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [51]:
X_train, y_train = utils.load_pickle('./dataset/train_dataset.pkl')
X_val, y_val = utils.load_pickle('./dataset/val_dataset.pkl')

In [52]:
print("Train size: ", X_train.shape)
print("Validation size: ", X_val.shape)

Train size:  (6279, 6066)
Validation size:  (698, 6066)


In [71]:
def _train_classifier(P, N, n):
        """
        train an AdaBoost classifier with n features
        """
        clf = AdaBoostClassifier(n)
        X, y = utils.merge_P_N(P, N)
        clf.fit(X, y)
        return clf

In [72]:
clf = _train_classifier(X_train[y_train == 1], X_train[y_train == 0], 1)

In [108]:
def _eval(clf, X_val, y_val, thresh=0.5, verboase=False):
    ypred = clf.predict_th(X_val, thresh)
    true_pos = np.sum((ypred == 1) & (y_val == 1))
    true_neg = np.sum((ypred == 0) & (y_val == 0))
    false_pos = np.sum((ypred == 1) & (y_val == 0))
    false_neg = np.sum((ypred == 0) & (y_val == 1))

    false_postive_rate = false_pos / (false_pos+true_neg)
    detection_rate = true_pos / (false_neg+true_pos)
    if verboase:
        print("True Positive: ", true_pos)
        print("True Negative: ", true_neg)
        print("False Positive: ", false_pos)
        print("False Negative: ", false_neg)
        print("FPR: ", false_postive_rate)
        print("DR: ", detection_rate)
    return false_postive_rate, detection_rate

In [109]:
def _update_N(N, clf, threshold):
    """
    update negative samples
    """
    false_N_pred = clf.predict_th(N, threshold)
    return N[false_N_pred == 1]

In [110]:
clfs = []
thresholds = []
Ftarget = 0.07
f = 0.60
d = 0.94
step = 0.05

In [111]:
def train(P_train, N_train, X_val, y_val):
        P = P_train
        N = N_train.copy()
        F1 = 1
        D1 = 1
        i = 0
        while F1 > Ftarget and len(N) > 0:
            i = i + 1
            F0 = F1
            D0 = D1
            n = 0
            print(f"=================== Training layer {i} FPR = {F1}====================")
            while F1 > f * F0:
                print(f"\t=== Start Training classifier with {n + 1} features ===")
                n = n + 1
                clf = _train_classifier(P, N, n)
                threshold = 1
                F1, D1 = _eval(clf, X_val, y_val, threshold)
                
                while D1 < d * D0:
                    print(f"\t\t\nRevaluate Classifier with {threshold} threshold \n\t D ={D1} target_D = {d * D0} \n\t F = {F1} target_F = {f * F0}")
                    threshold -= step
                    if threshold < -1.0 : 
                        threshold = -1.0
                    F1, D1 = _eval(clf, X_val, y_val, threshold)


                print(f"\tFinished Training weak classifier with {n} features with: ")
                print("\tFalse Postive rate = ", F1)
                print("\tDetection rate = ", D1)
                print("\tThreshold = ", threshold)
                print("\t=====================")
                clfs.append(clf)
                utils.save_pickle((clf.models, clf.alphas), f"./models/layer_{i}_classifier_{n}.pkl")
                thresholds.append(threshold)

            if F1 > Ftarget:
                N = _update_N(N, clf, threshold)

In [112]:
train(X_train[y_train == 1], X_train[y_train == 0], X_val, y_val)

	=== Start Training classifier with 1 features ===
	
Revaluate Classifier with 1 threshold 
	 D =0.0 target_D = 0.94 
	 F = 0.0 target_F = 0.6
	
Revaluate Classifier with 0.95 threshold 
	 D =0.0 target_D = 0.94 
	 F = 0.0 target_F = 0.6
	
Revaluate Classifier with 0.8999999999999999 threshold 
	 D =0.0 target_D = 0.94 
	 F = 0.0 target_F = 0.6
	Finished Training weak classifier with 1 features with: 
	False Postive rate =  0.25054945054945055
	Detection rate =  0.9629629629629629
	Threshold =  0.8499999999999999
	=== Start Training classifier with 1 features ===
	
Revaluate Classifier with 1 threshold 
	 D =0.0 target_D = 0.9051851851851851 
	 F = 0.0 target_F = 0.15032967032967032
	
Revaluate Classifier with 0.95 threshold 
	 D =0.0 target_D = 0.9051851851851851 
	 F = 0.0 target_F = 0.15032967032967032
	
Revaluate Classifier with 0.8999999999999999 threshold 
	 D =0.0 target_D = 0.9051851851851851 
	 F = 0.0 target_F = 0.15032967032967032
	
Revaluate Classifier with 0.84999999999999