# Adaboost算法实现
> [ensemble.py](./ensemble.py)为核心算法实现代码
>
> 数据集[下载地址](https://github.com/LiZhenLiangLee/ML2019-lab-03)（dataset文件夹）

In [None]:
import numpy as np
import pickle
from PIL import Image
import os
import feature
from sklearn.utils import Bunch
from sklearn.model_selection import train_test_split
from ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

## 预处理

In [6]:
def image_preprocess(path):
    """转化为24*24的灰度图片,提取特征"""
    face_dir = os.path.join(path, 'face')
    nonface_dir = os.path.join(path, 'nonface')
    file_list1 = os.listdir(os.path.join(path, 'face'))
    file_list2 = os.listdir(os.path.join(path, 'nonface'))
    image_list1 = []
    image_list2 = []
    bun = Bunch()
    for f in file_list1:
        img = Image.open(os.path.join(face_dir, str(f)))
        img = img.convert('L')
        img = img.resize((24, 24), Image.ANTIALIAS)
        img = np.array(img)
        img = feature.NPDFeature(img).extract()
        image_list1.append(img)
    for f in file_list2:
        img = Image.open(os.path.join(nonface_dir, str(f)))
        img = img.convert('L')
        img = img.resize((24, 24), Image.ANTIALIAS)
        img = np.array(img)
        img = feature.NPDFeature(img).extract()
        image_list2.append(img)
    bun.data = np.array(image_list1 + image_list2)
    target1 = np.full_like(np.empty((len(image_list1), 1)), 1)
    target2 = np.full_like(np.empty((len(image_list2), 1)), 0)
    bun.target = np.append(target1, target2)
    pickle.dump(bun, open('./face_data.pkl', 'wb'))
    return bun




## 划分测试集训练集
保存为pickle

In [7]:
path = './datasets/original'

finish preprocess


((200, 165600), (800, 165600))

第一次运行先跑这一行代码，将预处理后的特征数据保存到缓存中，后面可以将其注释掉

In [None]:
data = image_preprocess(path=path)  # 第一次运行先跑这一行代码，后面可以将其注释

In [None]:
data = pickle.load(open('./face_data.pkl', 'rb'))
print('finish preprocess')

X_train, X_val, y_train, y_val = train_test_split(data.data, data.target, test_size=0.2, shuffle=True)

X_val.shape, X_train.shape

## 开始训练与校验

In [None]:
clf = AdaBoostClassifier(DecisionTreeClassifier, 10)
clf.fit(X_train, y_train)

pred = clf.predict(X_val)
print(classification_report(y_val, pred))

