In [114]:
#실습전 설정

import sys
assert sys.version_info >= (3,5)

import sklearn 
assert sklearn.__version__ >= "0.20"
import numpy as np
import os
 
np.random.seed(42)

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

PROJECT_ROOT_DIR ="."
CHAPTER_ID="ensembles"
IMAGES_PATH=os.path.join(PROJECT_ROOT_DIR, 'images', CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path=os.path.join(IMAGES_PATH, fig_id+"."+fig_extension)
    print("그림저장: ", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

## PG#4 랜덤 포레스트
###  Noise=0.4인 1000개의 데이터셋을 생성

In [115]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=1000, noise=0.40, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

#### 규제 1
#### n_estimators = 500 / max_leaf_node = 30 / random_state = 42

In [116]:
#랜덤 포레스트
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500,max_leaf_nodes=30, random_state=42)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

from sklearn.metrics import accuracy_score
print('RandomForest accuracy_score:',accuracy_score(y_test, y_pred))

RandomForest accuracy_score: 0.856


In [117]:
#엑스트라 트리
from sklearn.ensemble import ExtraTreesClassifier

et_clf = ExtraTreesClassifier(n_estimators=500,max_leaf_nodes=30, random_state=42)
et_clf.fit(X_train, y_train)

y_pred_et = et_clf.predict(X_test)

from sklearn.metrics import accuracy_score
print('ExtraTrees accuracy_score:',accuracy_score(y_test, y_pred_et))

ExtraTrees accuracy_score: 0.872


#### 규제 2
#### n_estimators = 500 / max_leaf_node = 30 / random_state = 42 / max_depth = 25

In [118]:
#랜덤 포레스트
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators =500,max_leaf_nodes=30, random_state=42,max_depth=25)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

from sklearn.metrics import accuracy_score
print('RandomForest accuracy_score:',accuracy_score(y_test, y_pred))

RandomForest accuracy_score: 0.856


In [119]:
#엑스트라 트리
from sklearn.ensemble import ExtraTreesClassifier

et_clf = ExtraTreesClassifier(n_estimators =500,max_leaf_nodes=30, random_state=42,max_depth=25)
et_clf.fit(X_train, y_train)

y_pred_et = et_clf.predict(X_test)

from sklearn.metrics import accuracy_score
print('ExtraTrees accuracy_score:',accuracy_score(y_test, y_pred_et))

ExtraTrees accuracy_score: 0.872


#### 규제 3
#### n_estimators = 500 / max_leaf_node = 30 / random_state = 42 / max_depth = 25 / min_samples_leaf = 5

In [120]:
#랜덤 포레스트
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators =500,max_leaf_nodes=30,random_state=42,max_depth=25,min_samples_leaf=5)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

from sklearn.metrics import accuracy_score
print('RandomForest accuracy_score:',accuracy_score(y_test, y_pred))

RandomForest accuracy_score: 0.856


In [121]:
#엑스트라 트리
from sklearn.ensemble import ExtraTreesClassifier

et_clf = ExtraTreesClassifier(n_estimators =500,max_leaf_nodes=30,random_state=42,max_depth=25,min_samples_leaf=5)
et_clf.fit(X_train, y_train)

y_pred_et = et_clf.predict(X_test)

from sklearn.metrics import accuracy_score
print('ExtraTrees accuracy_score:',accuracy_score(y_test, y_pred_et))

ExtraTrees accuracy_score: 0.864


### 동일 조건에서 랜덤포레스트와 엑스트라 트리를 비교할 경우 
### 엑스트라 트리가 더 좋은 정확도를 보였다



엑스트라 트리


from sklearn.ensemble import ExtraTreesClassifier

et_clf = ExtraTreesClassifier(n_estimators =500,max_leaf_nodes=30, random_state=42,max_depth=25)

et_clf.fit(X_train, y_train)

y_pred_et = et_clf.predict(X_test)

from sklearn.metrics import accuracy_score

print('ExtraTrees accuracy_score:',accuracy_score(y_test, y_pred_et))