In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV

# 데이터 로드
X = np.load('./Ensemble_titanic_X_train.npy')
y = np.load('./Ensemble_titanic_y_train.npy')


In [None]:
# 개별 분류기 정의
clf1 = LogisticRegression(random_state=1)
clf2 = DecisionTreeClassifier(random_state=1, max_depth=4)
clf3 = GaussianNB()

# Voting Classifier 생성
eclf = VotingClassifier(
    estimators=[('lr', clf1), ('dt', clf2), ('gnb', clf3)],
    voting='hard'
)

# 교차검증 성능 평가
print("Voting Classifier CV Score:", cross_val_score(eclf, X, y, cv=5).mean())


Voting Classifier CV Score: 0.8222941661905668


In [None]:
params = {
    'lr__solver': ['liblinear'],
    'lr__penalty': ['l2'],
    'lr__C': [0.1, 5.0, 7.0, 10.0, 15.0, 20.0, 100.0],
    'dt__criterion': ['gini', 'entropy'],
    'dt__max_depth': [10,8,7,6,5,4,3,2],
    'dt__min_samples_leaf': [1,2,3,4,5,6,7,8,9]
}
grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
grid.fit(X, y)
print("Voting Classifier Best Score:", grid.best_score_)
print("Voting Classifier Best Params:", grid.best_params_)


Voting Classifier Best Score: 0.8256839966990415
Voting Classifier Best Params: {'dt__criterion': 'entropy', 'dt__max_depth': 5, 'dt__min_samples_leaf': 7, 'lr__C': 100.0, 'lr__penalty': 'l2', 'lr__solver': 'liblinear'}


In [None]:
clf = LogisticRegression(random_state=1)
bagging = BaggingClassifier(clf, n_estimators=100, random_state=1)
print("Bagging Classifier CV Score:", cross_val_score(bagging, X, y, cv=5).mean())


Bagging Classifier CV Score: 0.8312892782327175


In [13]:
params = {
    'n_estimators': [10,20,30,40,50,55],
    'max_samples': [0.5,0.6,0.7,0.8,0.9,1]
}
grid = GridSearchCV(estimator=bagging, param_grid=params, cv=5)
grid.fit(X, y)
print("Bagging Classifier Best Score:", grid.best_score_)
print("Bagging Classifier Best Params:", grid.best_params_)


Bagging Classifier Best Score: 0.8290484352186885
Bagging Classifier Best Params: {'max_samples': 0.9, 'n_estimators': 20}


In [15]:
rf = RandomForestClassifier(n_estimators=100, max_features=2, n_jobs=-1)
print("Random Forest CV Score:", cross_val_score(rf, X, y, cv=5).mean())


Random Forest CV Score: 0.80769377261474


In [16]:
params = {
    'n_estimators': [10, 20, 30, 50, 100],
    'max_features': [1,2,3,4,5,6,7,10,15,20,25, X.shape[1]]
}
grid = GridSearchCV(estimator=rf, param_grid=params, cv=5)
grid.fit(X, y)
print("Random Forest Best Score:", grid.best_score_)
print("Random Forest Best Params:", grid.best_params_)

# OOB Score
best_rf = grid.best_estimator_
best_rf.set_params(oob_score=True)
best_rf.fit(X, y)
print("Random Forest OOB Score:", best_rf.oob_score_)


Random Forest Best Score: 0.8256776487018346
Random Forest Best Params: {'max_features': 20, 'n_estimators': 10}
Random Forest OOB Score: 0.8053993250843644


  warn(


In [17]:
ada = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=2),
    n_estimators=500
)
print("AdaBoost CV Score:", cross_val_score(ada, X, y, cv=5).mean())


AdaBoost CV Score: 0.794172538564083


In [18]:
params = {
    'estimator__criterion': ['gini', 'entropy'],
    'estimator__max_features': [7, 8],
    'estimator__max_depth': [1, 2],
    'n_estimators': [23, 24, 25, 26, 27],
    'learning_rate': [0.4, 0.45, 0.5, 0.55, 0.6]
}
grid = GridSearchCV(estimator=ada, param_grid=params, cv=5, n_jobs=-1)
grid.fit(X, y)
print("AdaBoost Best Score:", grid.best_score_)
print("AdaBoost Best Params:", grid.best_params_)

# 특성 중요도
print("AdaBoost Feature Importances:", grid.best_estimator_.feature_importances_)


AdaBoost Best Score: 0.838056243255253
AdaBoost Best Params: {'estimator__criterion': 'gini', 'estimator__max_depth': 2, 'estimator__max_features': 7, 'learning_rate': 0.45, 'n_estimators': 26}
AdaBoost Feature Importances: [0.1885751  0.22924978 0.02321966 0.0707448  0.04704746 0.07434986
 0.03863318 0.02130352 0.01761432 0.01630942 0.00795058 0.
 0.         0.05444326 0.         0.08050383 0.01482421 0.00944737
 0.         0.         0.01278048 0.02416768 0.02054383 0.0226259
 0.         0.02566578 0.        ]
