In [1]:
# データ加工・処理・分析ライブラリ
import numpy as np
import numpy.random as random
import scipy as sp
from pandas import Series, DataFrame
import pandas as pd

# 可視化ライブラリ
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
%matplotlib inline

# 機械学習ライブラリ
import sklearn

# 小数第三位まで表示
%precision 3

'%.3f'

<span style="font-size: 200%">アンサンブル学習</span>

**バギング**

In [3]:
# インポート
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

# 乳がんのデータ読み込み
cancer = load_breast_cancer()

# 訓練データとテストデータに分類
X_train, X_test, y_train, y_test = train_test_split(cancer.data, 
                                                    cancer.target, 
                                                    stratify = cancer.target, 
                                                    random_state = 66)
# k-NNモデルとそのバギングの設定
models = {
    'kNN':KNeighborsClassifier(), 
    'bagging':BaggingClassifier(KNeighborsClassifier(), n_estimators=100, random_state=0)    
}

# モデル構築
scores ={}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    scores[(model_name, 'train_score')] = model.score(X_train, y_train)
    scores[(model_name, 'test_score')] = model.score(X_test, y_test)
    
# 表示
pd.Series(scores).unstack()


Unnamed: 0,test_score,train_score
bagging,0.937063,0.950704
kNN,0.923077,0.948357


**ブースティング**

In [5]:
# インポート
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.datasets import load_boston

# Housingデータセットを読み込み
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state = 66)

# 決定木とAdaBoostRegressorのパラメータ設定
models = {
    'tree':DecisionTreeRegressor(random_state=0), 
    'AdaBoostRegressor':AdaBoostRegressor(DecisionTreeRegressor(), random_state=0)    
}

# モデル構築
scores ={}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    scores[(model_name, 'train_score')] = model.score(X_train, y_train)
    scores[(model_name, 'test_score')] = model.score(X_test, y_test)
    
# 表示
pd.Series(scores).unstack()


Unnamed: 0,test_score,train_score
AdaBoostRegressor,0.923301,0.99944
tree,0.687582,1.0


**ランダムフォレストと勾配ブースティング**

In [8]:
# インポート
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.datasets import load_boston

# Housingデータセットを読み込み
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state = 66)

# 決定木とAdaBoostRegressorのパラメータ設定
models = {
    'RandomForestRegressor':RandomForestRegressor(n_estimators=10, random_state=0), 
    'GradientBoostingRegressor':GradientBoostingRegressor(random_state=0)    
}

# モデル構築
scores ={}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    scores[(model_name, 'train_score')] = model.score(X_train, y_train)
    scores[(model_name, 'test_score')] = model.score(X_test, y_test)
    
# 表示
pd.Series(scores).unstack()


Unnamed: 0,test_score,train_score
GradientBoostingRegressor,0.926076,0.977138
RandomForestRegressor,0.859954,0.967546
