In [None]:
import pickle
import sklearn
import numpy as np
from sklearn.tree import DecisionTreeClassifier
import math

class AdaBoostClassifier:
    '''A simple AdaBoost Classifier.'''

    def __init__(self, weak_classifier, n_weakers_limit):
        '''Initialize AdaBoostClassifier

        Args:
            weak_classifier: The class of weak classifier, which is recommend to be sklearn.tree.DecisionTreeClassifier.
            n_weakers_limit: The maximum number of weak classifier the model can use.
        '''
        self.weak_classifier = weak_classifier
        self.n_weakers_limit = n_weakers_limit
        pass

    def is_good_enough(self):
        '''Optional'''
        pass

    def fit(self, X, y):
        '''Build a boosted classifier from the training set (X, y).
        Args:
            X: An ndarray indicating the samples to be trained, which shape should be (n_samples,n_features).
            y: An ndarray indicating the ground-truth labels correspond to X, which shape should be (n_samples,1).
        '''
        n_samples, n_features = X.shape
        w = np.full(n_samples, 1/n_samples)
#        index = [range(n_features)]
        for epoch in range(self.n_weakers_limit):
            self.weak_classifier = DecisionTreeClassifier()
            self.weak_classifier = self.weak_classifier.fit(X=X, y=y, sample_weight=w)
            w=w.reshape((-1,))
            y = y.tolist()
            predicts = self.predict(X)
            predicts = predicts.tolist()
            e=0
            for i in range(n_samples):
               if predicts[i] != y[i]:
                   e += w[i]
            e = max(e, 10**(-8))
            a = 0.5 * math.log((1 - e) / e)
            s = 0
            for i in range(n_samples):
                w[i] = w[i] * math.exp(-y[i] * a * predicts[i])
                s += w[i]
            for i in range(n_samples):
                w[i] /= s
            y = np.array(y)
#        return self.weak_classifier
        pass



    def predict_scores(self, X):
        '''Calculate the weighted sum score of the whole base classifiers for given samples.

        Args:
            X: An ndarray indicating the samples to be predicted, which shape should be (n_samples,n_features).

        Returns:
            An one-dimension ndarray indicating the scores of differnt samples, which shape should be (n_samples,1).
        '''
        pass

    def predict(self, X, threshold=0):
        '''Predict the catagories for geven samples.

        Args:
            X: An ndarray indicating the samples to be predicted, which shape should be (n_samples,n_features).
            threshold: The demarcation number of deviding the samples into two parts.

        Returns:
            An ndarray consists of predicted labels, which shape should be (n_samples,1).
        '''
        predicts = self.weak_classifier.predict(X)
        predicts = predicts.tolist()
        for i in range(X.shape[0]):
            if predicts[i] > threshold:
                predicts[i] = 1
            else:
                predicts[i] = -1
        predicts = np.array(predicts)
        return predicts




    @staticmethod
    def save(model, filename):
        with open(filename, "wb") as f:
            pickle.dump(model, f)

    @staticmethod
    def load(filename):
        with open(filename, "rb") as f:
            return pickle.load(f)


In [3]:
import cv2
import os
import numpy as np
import sys
import sklearn
sys.path.append('/Users/taozizhuo/PycharmProjects/experiment/venv/ML2018-lab-03')
from ensemble import AdaBoostClassifier
import feature
import pickle
from sklearn.model_selection import train_test_split


if __name__ == "__main__":

    def get_features(path):
        image_paths = [os.path.join(path, f) for f in os.listdir(path)]
#        ids = []
        features = []

        for image_path in image_paths:
            img = cv2.imread(image_path)
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img_reshape = cv2.resize(gray_img, (25, 25), interpolation=cv2.INTER_CUBIC)
#            image_id = int(os.path.split(image_path)[-1].split(".")[0].split('_')[1])
            image = feature.NPDFeature(img_reshape)
            pre_features = feature.NPDFeature.extract(image)
            pickle.dump(pre_features, open("save1.p", "wb"))
            face_feature = pickle.load(open("save1.p", "rb"))
            features.append(face_feature)
#            ids.append(image_id)

        return features


    Faces = np.array(get_features('./datasets/original/face'))
    Non_faces = np.array(get_features('./datasets/original/nonface'))
    n_samples, n_features = Faces.shape
    y_faces = []
    y_non_faces = []
    for i in range(n_samples):
        y_faces.append(1)
        y_non_faces.append(-1)
    y_faces = np.array(y_faces).reshape((-1, 1))
    y_non_faces = np.array(y_non_faces).reshape((-1, 1))
#    faces_label = dict(zip(IDs, y_faces_features))
#    non_faces_label = dict(zip(Non_IDs, y_non_faces_features))
    Faces_train, Faces_val, y_faces_train, y_faces_val = train_test_split(Faces, y_faces, test_size=0.5)
    Non_faces_train, Non_faces_val, y_non_faces_train, y_non_faces_val = train_test_split(Non_faces, y_non_faces,
                                                                                          test_size=0.5)
    X_train = np.concatenate((Faces_train, Non_faces_train), axis=0)
    X_val = np.concatenate((Faces_val, Non_faces_val), axis=0)
    y_train = np.concatenate((y_faces_train, y_non_faces_train), axis=0)
    y_val = np.concatenate((y_faces_val, y_non_faces_val), axis=0)

    X_train = np.column_stack((y_train, X_train))
    np.random.shuffle(X_train)
    y_train = X_train[:, 0]
    X_train= np.delete(X_train, 0, axis=1)
    


Calculating the NPD table...


In [4]:
    classifier = AdaBoostClassifier(sklearn.tree.DecisionTreeClassifier(), 10)
    classifier.fit(X_train, y_train)

In [5]:
    predicts = classifier.predict(X_val)
    report = sklearn.metrics.classification_report(y_true=y_val, y_pred=predicts, digits=2)


In [28]:
    with open('classifier_report.txt', 'w') as f:
        f.write(report)

In [29]:
    report

'              precision    recall  f1-score   support\n\n          -1       0.83      0.89      0.86       250\n           1       0.88      0.82      0.85       250\n\n   micro avg       0.86      0.86      0.86       500\n   macro avg       0.86      0.86      0.86       500\nweighted avg       0.86      0.86      0.86       500\n'

In [6]:
    print(report)

              precision    recall  f1-score   support

          -1       0.88      0.83      0.85       250
           1       0.84      0.88      0.86       250

   micro avg       0.86      0.86      0.86       500
   macro avg       0.86      0.86      0.86       500
weighted avg       0.86      0.86      0.86       500



In [8]:
    f=open('classifier_report.txt', 'w')
    f.write(report)
    f.close()