In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV

# 데이터 로드
X = np.load('Ensemble_titanic_X_train.npy')
y = np.load('Ensemble_titanic_y_train.npy')


In [None]:
# 개별 분류기 정의
clf1 = LogisticRegression(random_state=1)
clf2 = DecisionTreeClassifier(random_state=1, max_depth=4)
clf3 = GaussianNB()

# Voting Classifier 생성
eclf = VotingClassifier(
    estimators=[('lr', clf1), ('dt', clf2), ('gnb', clf3)],
    voting='hard'
)

# 교차검증 성능 평가
print("Voting Classifier CV Score:", cross_val_score(eclf, X, y, cv=5).mean())


In [None]:
params = {
    'lr__solver': ['liblinear'],
    'lr__penalty': ['l2'],
    'lr__C': [0.1, 5.0, 7.0, 10.0, 15.0, 20.0, 100.0],
    'dt__criterion': ['gini', 'entropy'],
    'dt__max_depth': [10,8,7,6,5,4,3,2],
    'dt__min_samples_leaf': [1,2,3,4,5,6,7,8,9]
}
grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
grid.fit(X, y)
print("Voting Classifier Best Score:", grid.best_score_)
print("Voting Classifier Best Params:", grid.best_params_)


In [None]:
clf = LogisticRegression(random_state=1)
bagging = BaggingClassifier(clf, n_estimators=100, random_state=1)
print("Bagging Classifier CV Score:", cross_val_score(bagging, X, y, cv=5).mean())


In [None]:
params = {
    'n_estimators': [10,20,30,40,50,55],
    'max_samples': [0.5,0.6,0.7,0.8,0.9,1]
}
grid = GridSearchCV(estimator=bagging, param_grid=params, cv=5)
grid.fit(X, y)
print("Bagging Classifier Best Score:", grid.best_score_)
print("Bagging Classifier Best Params:", grid.best_params_)


In [None]:
rf = RandomForestClassifier(n_estimators=100, max_features=2, n_jobs=-1)
print("Random Forest CV Score:", cross_val_score(rf, X, y, cv=5).mean())


In [None]:
params = {
    'n_estimators': [10, 20, 30, 50, 100],
    'max_features': [1,2,3,4,5,6,7,10,15,20,25, X.shape[1]]
}
grid = GridSearchCV(estimator=rf, param_grid=params, cv=5)
grid.fit(X, y)
print("Random Forest Best Score:", grid.best_score_)
print("Random Forest Best Params:", grid.best_params_)

# OOB Score
best_rf = grid.best_estimator_
best_rf.set_params(oob_score=True)
best_rf.fit(X, y)
print("Random Forest OOB Score:", best_rf.oob_score_)


In [None]:
ada = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=2),
    n_estimators=500
)
print("AdaBoost CV Score:", cross_val_score(ada, X, y, cv=5).mean())


In [None]:
params = {
    'estimator__criterion': ['gini', 'entropy'],
    'estimator__max_features': [7, 8],
    'estimator__max_depth': [1, 2],
    'n_estimators': [23, 24, 25, 26, 27],
    'learning_rate': [0.4, 0.45, 0.5, 0.55, 0.6]
}
grid = GridSearchCV(estimator=ada, param_grid=params, cv=5, n_jobs=-1)
grid.fit(X, y)
print("AdaBoost Best Score:", grid.best_score_)
print("AdaBoost Best Params:", grid.best_params_)

# 특성 중요도
print("AdaBoost Feature Importances:", grid.best_estimator_.feature_importances_)
