목적 : listed data Modeling from standard scaled 
===================

import modules

In [114]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

read csv files

In [115]:
train = pd.read_csv('../datasets/Train_Test/listed_resampled_train_data.csv')
test = pd.read_csv('../datasets/Train_Test/listed_test_data.csv')

In [116]:
# train_life_cycle = train[['도입기', '성장기', '성숙기', '쇠퇴기']]
# test_life_cycle = test[['도입기', '성장기', '성숙기', '쇠퇴기']]

selected features

In [117]:
x_train = train[['CASH FLOW 대 부채비율', '당좌비율', '순운전자본비율', '자기자본구성비율','경영자본순이익률',
                 '총자본영업이익률', '매출액영업이익률', '금융비용부담률', '이윤분배율', '유형자산회전율',
                 '상장년수', 'PCR', '쭈피처', '도입기', '성장기', '성숙기', '쇠퇴기']]

x_test = test[['CASH FLOW 대 부채비율', '당좌비율', '순운전자본비율', '자기자본구성비율','경영자본순이익률',
                 '총자본영업이익률', '매출액영업이익률', '금융비용부담률', '이윤분배율', '유형자산회전율',
                 '상장년수', 'PCR', '쭈피처', '도입기', '성장기', '성숙기', '쇠퇴기']]

y_train = train['부실판단']
y_test = test['부실판단']

# standard scaling

In [118]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

labels = x_train.columns

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train = pd.DataFrame(data=x_train, columns = labels)
x_test = pd.DataFrame(data=x_test, columns = labels)

# modeling

In [119]:
default = pd.DataFrame()
best = pd.DataFrame()

LogisticRegression

In [120]:
model = LogisticRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)


print(f'Accuracy: {accuracy:.3f}')
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1 스코어: {f1:.2f}')
print(f'ROC AUC 스코어: {roc_auc:.2f}')


Accuracy: 0.766
Precision: 0.772
Recall: 0.772
F1 스코어: 0.77
ROC AUC 스코어: 0.77


In [121]:

result_logit = {
    'Accuracy' : round(accuracy, 3),
    'Precision' : round(precision, 3),
    'Recall' : round(recall, 3),
    'F1' : round(f1, 3),
    'ROC AUC' : round(roc_auc, 3)
}
default['LogisticRegression'] = result_logit
default

Unnamed: 0,LogisticRegression
Accuracy,0.766
Precision,0.772
Recall,0.772
F1,0.772
ROC AUC,0.766


In [122]:
# 하이퍼파라미터 범위 지정

param_grid = {
    'C': np.arange(0, 9.0, 0.1),  # 규제 강도
    'penalty': ['l1', 'l2', 'elasticnet'],  # 규제 유형
    'solver': ['liblinear', 'saga']  # 최적화 알고리즘
}

model = LogisticRegression()
random_logit = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=99, 
                                   scoring='accuracy', cv=7, verbose=1, random_state=42)

random_logit.fit(x_train, y_train)

best_model = random_logit.best_estimator_

y_pred = best_model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print("Logistic 하이퍼파라미터 조정 평가 지표")
print("Best Parameters:", random_logit.best_params_)
print(f'Accuracy: {accuracy:.3f}')
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1 Score: {f1:.3f}')
print(f'ROC AUC Score: {roc_auc:.3f}')


Fitting 7 folds for each of 99 candidates, totalling 693 fits


Logistic 하이퍼파라미터 조정 평가 지표
Best Parameters: {'solver': 'saga', 'penalty': 'l2', 'C': 0.1}
Accuracy: 0.782
Precision: 0.796
Recall: 0.772
F1 Score: 0.784
ROC AUC Score: 0.782


In [123]:
result0_logit = {
    'Accuracy' : round(accuracy, 3),
    'Precision' : round(precision, 3),
    'Recall' : round(recall, 3),
    'ROC AUC' : round(roc_auc, 3),
    'Best Parameters' : random_logit.best_params_
}
best['LogisticRegression'] = result0_logit
best

Unnamed: 0,LogisticRegression
Accuracy,0.782
Precision,0.796
Recall,0.772
ROC AUC,0.782
Best Parameters,"{'solver': 'saga', 'penalty': 'l2', 'C': 0.1}"


RandomForestClassifier

In [124]:
# Random Forest 모델 생성 및 학습
rf_model = RandomForestClassifier()
rf_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_rf = rf_model.predict(x_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)
roc_auc_rf = roc_auc_score(y_test, y_pred_rf)

print("Random Forest 모델 평가 지표")
print(f'Accuracy: {accuracy_rf:.3f}')
print(f'Precision: {precision_rf:.3f}')
print(f'Recall: {recall_rf:.3f}')
print(f'F1 스코어: {f1_rf:.2f}')
print(f'ROC AUC 스코어: {roc_auc_rf:.2f}')


Random Forest 모델 평가 지표
Accuracy: 0.797
Precision: 0.802
Recall: 0.802
F1 스코어: 0.80
ROC AUC 스코어: 0.80


In [125]:

result_rf = {
    'Accuracy' : round(accuracy_rf, 3),
    'Precision' : round(precision_rf, 3),
    'Recall' : round(recall_rf, 3),
    'F1' : round(f1_rf, 3),
    'ROC AUC' : round(roc_auc_rf, 3)
}
default['RandomForestClassifier'] = result_rf
default

Unnamed: 0,LogisticRegression,RandomForestClassifier
Accuracy,0.766,0.797
Precision,0.772,0.802
Recall,0.772,0.802
F1,0.772,0.802
ROC AUC,0.766,0.797


In [126]:
random_search = {'n_estimators': np.arange(0, 200, 10),
                 'max_depth': np.arange(0, 33, 1),
                 'min_samples_split': np.arange(1, 9, 1),
                 'min_samples_leaf': np.arange(1, 9, 1)}

clf = RandomForestClassifier()
random_rf = RandomizedSearchCV(estimator = clf, param_distributions = random_search, n_iter = 99, 
                               cv = 4, verbose= 1, random_state= 101, n_jobs = -1)
random_rf.fit(x_train,y_train)
random_pf = random_rf.best_estimator_.predict(x_test)

accuracy_rdrf = accuracy_score(y_test, random_pf)
precision_rdrf = precision_score(y_test, random_pf)
recall_rdrf = recall_score(y_test, random_pf)
f1_rdrf = f1_score(y_test, random_pf)
roc_auc_rdrf = roc_auc_score(y_test, random_pf)

print("Random Forest 모델 하이퍼파라미터 조정 평가 지표")
print(random_rf.best_params_)
print(f'Accuracy: {accuracy_rdrf:.3f}')
print(f'Precision: {precision_rdrf:.3f}')
print(f'Recall: {recall_rdrf:.3f}')
print(f'F1 스코어: {f1_rdrf:.3f}')
print(f'ROC AUC 스코어: {roc_auc_rdrf:.3f}')

Fitting 4 folds for each of 99 candidates, totalling 396 fits


Random Forest 모델 하이퍼파라미터 조정 평가 지표
{'n_estimators': 170, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_depth': 27}
Accuracy: 0.812
Precision: 0.808
Recall: 0.832
F1 스코어: 0.82
ROC AUC 스코어: 0.81


In [127]:
result0_rf = {
    'Accuracy' : round(accuracy_rdrf, 3),
    'Precision' : round(precision_rdrf, 3),
    'Recall' : round(recall_rdrf, 3),
    'F1' : round(f1_rdrf, 3),
    'ROC AUC' : round(roc_auc_rdrf, 3),
    'Best Parameters' : random_rf.best_params_
}
best['RandomForestClassifier'] = result0_rf
best

Unnamed: 0,LogisticRegression
Accuracy,0.812
Precision,0.808
Recall,0.832
ROC AUC,0.812
Best Parameters,"{'n_estimators': 170, 'min_samples_split': 8, ..."


AdaBoostClassifier

In [128]:
# AdaBoost 모델 생성 및 학습
adaboost_model = AdaBoostClassifier()
adaboost_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_adaboost = adaboost_model.predict(x_test)

accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
precision_adaboost = precision_score(y_test, y_pred_adaboost)
recall_adaboost = recall_score(y_test, y_pred_adaboost)
f1_adaboost = f1_score(y_test, y_pred_adaboost)
roc_auc_adaboost = roc_auc_score(y_test, y_pred_adaboost)

print("AdaBoost 모델 평가 지표:")
print(f'Accuracy: {accuracy_adaboost:.3f}')
print(f'Precision: {precision_adaboost:.3f}')
print(f'Recall: {recall_adaboost:.3f}')
print(f'F1 스코어: {f1_adaboost:.3f}')
print(f'ROC AUC 스코어: {roc_auc_adaboost:.3f}')

AdaBoost 모델 평가 지표:
Accuracy: 0.777
Precision: 0.813
Recall: 0.733
F1 스코어: 0.77
ROC AUC 스코어: 0.78


In [129]:
result_adaboost = {
    'Accuracy' : round(accuracy_adaboost, 3),
    'Precision' : round(precision_adaboost, 3),
    'Recall' : round(recall_adaboost, 3),
    'F1' : round(f1_adaboost, 3),
    'ROC AUC' : round(roc_auc_adaboost, 3)
}
default['AdaBoostClassifier'] = result_adaboost
default

Unnamed: 0,LogisticRegression,RandomForestClassifier,AdaBoostClassifier
Accuracy,0.766,0.797,0.777
Precision,0.772,0.802,0.813
Recall,0.772,0.802,0.733
F1,0.772,0.802,0.771
ROC AUC,0.766,0.797,0.778


In [130]:
adaboost_model = AdaBoostClassifier()

param_grid = {
    'n_estimators': np.arange(10, 300, 10),  # 트리 개수
    'learning_rate': np.arange(0.001, 1, 0.001)  # 학습률
}

random_adaboost = RandomizedSearchCV(estimator=adaboost_model, param_distributions=param_grid, n_iter=99, 
                                   scoring='accuracy', cv=7, verbose=1, random_state=42)

random_adaboost.fit(x_train, y_train)

best_model = random_adaboost.best_estimator_

y_pred_adaboost = best_model.predict(x_test)

# 평가 메트릭 계산
accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
precision_adaboost = precision_score(y_test, y_pred_adaboost)
recall_adaboost = recall_score(y_test, y_pred_adaboost)
f1_adaboost = f1_score(y_test, y_pred_adaboost)
roc_auc_adaboost = roc_auc_score(y_test, y_pred_adaboost)

print("Best Parameters:", random_adaboost.best_params_)
print("AdaBoost 모델 하이퍼파라미터 조정 평가 지표:")
print(f'Accuracy: {accuracy_adaboost:.3f}')
print(f'Precision: {precision_adaboost:.3f}')
print(f'Recall: {recall_adaboost:.3f}')
print(f'F1 Score: {f1_adaboost:.3f}')
print(f'ROC AUC Score: {roc_auc_adaboost:.3f}')

Fitting 7 folds for each of 99 candidates, totalling 693 fits


Best Parameters: {'n_estimators': 30, 'learning_rate': 0.501}
AdaBoost 모델 하이퍼파라미터 조정 평가 지표:
Accuracy: 0.817
Precision: 0.849
Recall: 0.782
F1 Score: 0.814
ROC AUC Score: 0.818


In [None]:
result0_adaboost = {
    'Accuracy' : round(accuracy_adaboost, 3),
    'Precision' : round(precision_adaboost, 3),
    'Recall' : round(recall_adaboost, 3),
    'F1' : round(f1_adaboost, 3),
    'ROC AUC' : round(roc_auc_adaboost, 3),
    'Best Parameters' : random_adaboost.best_params_
}
best['AdaBoostClassifier'] = result0_adaboost
best

BaggingClassifier

In [131]:
# Bagging 모델 생성 및 학습
bagging_model = BaggingClassifier()
bagging_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_bagging = bagging_model.predict(x_test)

accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
precision_bagging = precision_score(y_test, y_pred_bagging)
recall_bagging = recall_score(y_test, y_pred_bagging)
f1_bagging = f1_score(y_test, y_pred_bagging)
roc_auc_bagging = roc_auc_score(y_test, y_pred_bagging)

print("Bagging 모델 평가 지표:")
print(f'Accuracy: {accuracy_bagging:.3f}')
print(f'Precision: {precision_bagging:.3f}')
print(f'Recall: {recall_bagging:.3f}')
print(f'F1 스코어: {f1_bagging:.3f}')
print(f'ROC AUC 스코어: {roc_auc_bagging:.3f}')

Bagging 모델 평가 지표:
Accuracy: 0.741
Precision: 0.772
Recall: 0.703
F1 스코어: 0.736
ROC AUC 스코어: 0.742


In [132]:
result_bagging = {
    'Accuracy' : round(accuracy_bagging, 3),
    'Precision' : round(precision_bagging, 3),
    'Recall' : round(recall_bagging, 3),
    'F1' : round(f1_bagging, 3),
    'ROC AUC' : round(roc_auc_bagging, 3)
}
default['BaggingClassifier'] = result_bagging
default

Unnamed: 0,LogisticRegression,RandomForestClassifier,AdaBoostClassifier,BaggingClassifier
Accuracy,0.766,0.797,0.777,0.741
Precision,0.772,0.802,0.813,0.772
Recall,0.772,0.802,0.733,0.703
F1,0.772,0.802,0.771,0.736
ROC AUC,0.766,0.797,0.778,0.742


In [133]:
bagging_model = BaggingClassifier()

param_grid = {
    'n_estimators': np.arange(10, 100, 10),
    'max_samples': np.arange(0.1, 1.0, 0.1),
    'max_features': np.arange(0.1, 1, 0.1)
}

random_bagging = RandomizedSearchCV(estimator=bagging_model, param_distributions=param_grid, n_iter=99, 
                                   scoring='accuracy', cv=7, verbose=1, random_state=42)

random_bagging.fit(x_train, y_train)

# 최적의 모델 저장
best_model = random_bagging.best_estimator_

# 최적의 모델로 예측 수행
y_pred_bagging = best_model.predict(x_test)

# 평가 메트릭 계산
accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
precision_bagging = precision_score(y_test, y_pred_bagging)
recall_bagging = recall_score(y_test, y_pred_bagging)
f1_bagging = f1_score(y_test, y_pred_bagging)
roc_auc_bagging = roc_auc_score(y_test, y_pred_bagging)

print("Best Parameters:", random_bagging.best_params_)
print("Bagging 모델 하이퍼파라미터 평가 지표:")
print(f'Accuracy: {accuracy_bagging:.3f}')
print(f'Precision: {precision_bagging:.3f}')
print(f'Recall: {recall_bagging:.3f}')
print(f'F1 Score: {f1_bagging:.3f}')
print(f'ROC AUC Score: {roc_auc_bagging:.3f}')

Fitting 7 folds for each of 99 candidates, totalling 693 fits


Best Parameters: {'n_estimators': 40, 'max_samples': 0.5, 'max_features': 0.9}
Bagging 모델 하이퍼파라미터 평가 지표:
Accuracy: 0.822
Precision: 0.851
Recall: 0.792
F1 Score: 0.821
ROC AUC Score: 0.823


In [134]:
result0_bagging = {
    'Accuracy' : round(accuracy_bagging, 3),
    'Precision' : round(precision_bagging, 3),
    'Recall' : round(recall_bagging, 3),
    'F1' : round(f1_bagging, 3),
    'ROC AUC' : round(roc_auc_bagging, 3),
    'Best Parameters' : random_bagging.best_params_
}
best['BaggingClassifier'] = result0_bagging
best

Unnamed: 0,LogisticRegression,BaggingClassifier
Accuracy,0.812,0.822
Precision,0.808,0.851
Recall,0.832,0.792
ROC AUC,0.812,0.823
Best Parameters,"{'n_estimators': 170, 'min_samples_split': 8, ...","{'n_estimators': 40, 'max_samples': 0.5, 'max_..."


SVC

In [135]:
from sklearn.svm import SVC

# SVM 모델 생성 및 학습
svm_model = SVC(kernel='linear')
svm_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_svm = svm_model.predict(x_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print("SVM 모델 평가 지표:")
print(f'Accuracy: {accuracy_svm:.3f}')
print(f'Precision: {precision_svm:.3f}')
print(f'Recall: {recall_svm:.3f}')
print(f'F1 스코어: {f1_svm:.3f}')
print(f'ROC AUC 스코어: {roc_auc_svm:.3f}')


SVM 모델 평가 지표:
Accuracy: 0.782
Precision: 0.796
Recall: 0.772
F1 스코어: 0.784
ROC AUC 스코어: 0.782


In [136]:
result_svc_linear = {
    'Accuracy' : round(accuracy_svm, 3),
    'Precision' : round(precision_svm, 3),
    'Recall' : round(recall_svm, 3),
    'F1' : round(f1_svm, 3),
    'ROC AUC' : round(roc_auc_svm, 3)
}
default['SVC_linear'] = result_svc_linear
default

Unnamed: 0,LogisticRegression,RandomForestClassifier,AdaBoostClassifier,BaggingClassifier,SVC_linear
Accuracy,0.766,0.797,0.777,0.741,0.782
Precision,0.772,0.802,0.813,0.772,0.796
Recall,0.772,0.802,0.733,0.703,0.772
F1,0.772,0.802,0.771,0.736,0.784
ROC AUC,0.766,0.797,0.778,0.742,0.782


In [137]:
from sklearn.svm import SVC

# SVM 모델 생성 및 학습
svm_model = SVC(kernel='rbf')
svm_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_svm = svm_model.predict(x_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print("SVM 모델 평가 지표:")
print(f'Accuracy: {accuracy_svm:.3f}')
print(f'Precision: {precision_svm:.3f}')
print(f'Recall: {recall_svm:.3f}')
print(f'F1 스코어: {f1_svm:.3f}')
print(f'ROC AUC 스코어: {roc_auc_svm:.3f}')


SVM 모델 평가 지표:
Accuracy: 0.792
Precision: 0.812
Recall: 0.772
F1 스코어: 0.792
ROC AUC 스코어: 0.792


In [138]:
result_svc_rbf = {
    'Accuracy' : round(accuracy_svm, 3),
    'Precision' : round(precision_svm, 3),
    'Recall' : round(recall_svm, 3),
    'F1' : round(f1_svm, 3),
    'ROC AUC' : round(roc_auc_svm, 3)
}
default['SVC_rbf'] = result_svc_rbf
default

Unnamed: 0,LogisticRegression,RandomForestClassifier,AdaBoostClassifier,BaggingClassifier,SVC_linear,SVC_rbf
Accuracy,0.766,0.797,0.777,0.741,0.782,0.792
Precision,0.772,0.802,0.813,0.772,0.796,0.812
Recall,0.772,0.802,0.733,0.703,0.772,0.772
F1,0.772,0.802,0.771,0.736,0.784,0.792
ROC AUC,0.766,0.797,0.778,0.742,0.782,0.792


In [139]:
svm_model = SVC()

param_grid = {
    'C': np.arange(0.1, 100, 0.1),  # 규제 매개변수
    'gamma': np.arange(0.0001, 1, 0.0001),  # 커널 계수
    'kernel': ['linear', 'rbf', 'poly']  # 커널 타입
}

random_svc = RandomizedSearchCV(estimator=svm_model, param_distributions=param_grid, n_iter=99, 
                                   scoring='accuracy', cv=7, verbose=1, random_state=42)

random_svc.fit(x_train, y_train)

print("Best Parameters:", random_svc.best_params_)
best_model = random_svc.best_estimator_

y_pred_svm = best_model.predict(x_test)

# 평가 메트릭 계산
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

# 평가 메트릭 출력
print("SVM 모델 평가 지표:")
print(f'Accuracy: {accuracy_svm:.3f}')
print(f'Precision: {precision_svm:.3f}')
print(f'Recall: {recall_svm:.3f}')
print(f'F1 Score: {f1_svm:.3f}')
print(f'ROC AUC Score: {roc_auc_svm:.3f}')

Fitting 7 folds for each of 99 candidates, totalling 693 fits


Best Parameters: {'kernel': 'rbf', 'gamma': 0.0088, 'C': 9.0}
SVM 모델 평가 지표:
Accuracy: 0.746
Precision: 0.787
Recall: 0.693
F1 Score: 0.737
ROC AUC Score: 0.748


In [140]:
result0_svc = {
    'Accuracy' : round(accuracy_svm, 3),
    'Precision' : round(precision_svm, 3),
    'Recall' : round(recall_svm, 3),
    'F1' : round(f1_svm, 3),
    'ROC AUC' : round(roc_auc_svm, 3),
    'Best Parameters' : random_search.best_params_
}
best['AdaBoostClassifier'] = result0_svc
best

Unnamed: 0,LogisticRegression,BaggingClassifier,AdaBoostClassifier
Accuracy,0.812,0.822,0.746
Precision,0.808,0.851,0.787
Recall,0.832,0.792,0.693
ROC AUC,0.812,0.823,0.748
Best Parameters,"{'n_estimators': 170, 'min_samples_split': 8, ...","{'n_estimators': 40, 'max_samples': 0.5, 'max_...","{'kernel': 'rbf', 'gamma': 0.0088, 'C': 9.0}"


LGBMClassifier

In [141]:
from lightgbm import LGBMClassifier

# LGBM 모델 생성 및 학습
lgbm_model = LGBMClassifier()
lgbm_model.fit(x_train, y_train)

# 예측 및 성능 평가
y_pred_lgbm = lgbm_model.predict(x_test)

accuracy_lgbm = accuracy_score(y_test, y_pred_lgbm)
precision_lgbm = precision_score(y_test, y_pred_lgbm)
recall_lgbm = recall_score(y_test, y_pred_lgbm)
f1_lgbm = f1_score(y_test, y_pred_lgbm)
roc_auc_lgbm = roc_auc_score(y_test, y_pred_lgbm)

print("LightGBM 모델 평가 지표:")
print(f'Accuracy: {accuracy_lgbm:.3f}')
print(f'Precision: {precision_lgbm:.3f}')
print(f'Recall: {recall_lgbm:.3f}')
print(f'F1 스코어: {f1_lgbm:.3f}')
print(f'ROC AUC 스코어: {roc_auc_lgbm:.3f}')


[LightGBM] [Info] Number of positive: 128, number of negative: 128
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.311000 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 885
[LightGBM] [Info] Number of data points in the train set: 256, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
LightGBM 모델 평가 지표:
Accuracy: 0.802
Precision: 0.844
Recall: 0.752
F1 스코어: 0.796
ROC AUC 스코어: 0.803


In [142]:
result_lgbm = {
    'Accuracy' : round(accuracy_lgbm, 3),
    'Precision' : round(precision_lgbm, 3),
    'Recall' : round(recall_lgbm, 3),
    'F1' : round(f1_lgbm, 3),
    'ROC AUC' : round(roc_auc_lgbm, 3)
}
default['LGBMClassifier'] = result_lgbm
default

Unnamed: 0,LogisticRegression,RandomForestClassifier,AdaBoostClassifier,BaggingClassifier,SVC_linear,SVC_rbf,LGBMClassifier
Accuracy,0.766,0.797,0.777,0.741,0.782,0.792,0.802
Precision,0.772,0.802,0.813,0.772,0.796,0.812,0.844
Recall,0.772,0.802,0.733,0.703,0.772,0.772,0.752
F1,0.772,0.802,0.771,0.736,0.784,0.792,0.796
ROC AUC,0.766,0.797,0.778,0.742,0.782,0.792,0.803


In [143]:
param_grid = {
        'learning_rate': np.arange(0.01, 1, 0.01),
        'n_estimators': np.arange(10, 300, 10),
        'max_depth': np.arange(1, 9, 1),
        'num_leaves': np.arange(1, 160, 1),
        'min_child_samples': np.arange(1, 22, 1)
    }
# param_grid = {
#     'n_estimators': [100, 200, 300],
#     'learning_rate': [0.01, 0.1, 0.2],
# }

lgbm = LGBMClassifier()
random_lgbm = RandomizedSearchCV(lgbm, param_grid, cv = 7, n_jobs = -1, n_iter=99)
random_lgbm.fit(x_train,y_train)
random_pf = random_lgbm.best_estimator_.predict(x_test)

accuracy_rdrf = accuracy_score(y_test, random_pf)
precision_rdrf = precision_score(y_test, random_pf)
recall_rdrf = recall_score(y_test, random_pf)
f1_rdrf = f1_score(y_test, random_pf)
roc_auc_rdrf = roc_auc_score(y_test, random_pf)

print("LGBM 모델 하이퍼파라미터 조정 평가 지표")
print(random_lgbm.best_params_)
print(f'Accuracy: {accuracy_rdrf:.3f}')
print(f'Precision: {precision_rdrf:.3f}')
print(f'Recall: {recall_rdrf:.3f}')
print(f'F1 스코어: {f1_rdrf:.3f}')
print(f'ROC AUC 스코어: {roc_auc_rdrf:.3f}')

[LightGBM] [Info] Number of positive: 128, number of negative: 128
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 885
[LightGBM] [Info] Number of data points in the train set: 256, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
LGBM 모델 하이퍼파라미터 조정 평가 지표
{'num_leaves': 53, 'n_estimators': 80, 'min_child_samples': 1, 'max_depth': 3, 'learning_rate': 0.91}
Accuracy: 0.751
Precision: 0.802
Recall: 0.683
F1 스코어: 0.738
ROC AUC 스코어: 0.753


In [144]:
result0_lgbm = {
    'Accuracy' : round(accuracy_rdrf, 3),
    'Precision' : round(precision_rdrf, 3),
    'Recall' : round(recall_rdrf, 3),
    'F1' : round(f1_rdrf, 3),
    'ROC AUC' : round(roc_auc_rdrf, 3),
    'Best Parameters' : random_lgbm.best_params_
}
best['LGBMClassifier'] = result0_lgbm
best

Unnamed: 0,LogisticRegression,BaggingClassifier,AdaBoostClassifier,LGBMClassifier
Accuracy,0.812,0.822,0.746,0.751
Precision,0.808,0.851,0.787,0.802
Recall,0.832,0.792,0.693,0.683
ROC AUC,0.812,0.823,0.748,0.753
Best Parameters,"{'n_estimators': 170, 'min_samples_split': 8, ...","{'n_estimators': 40, 'max_samples': 0.5, 'max_...","{'kernel': 'rbf', 'gamma': 0.0088, 'C': 9.0}","{'num_leaves': 53, 'n_estimators': 80, 'min_ch..."


#### 딥러닝

In [145]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Dropout
# import tensorflow as tf

In [146]:
# model = Sequential()
# model.add(Dense(256, input_dim = 12, activation = 'relu'))
# model.add(Dropout(0.5))
# model.add(Dense(64, activation = 'relu'))
# model.add(Dropout(0.5))
# model.add(Dense(1, activation = 'sigmoid'))

In [147]:
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# history = model.fit(x_train, y_train, epochs=200, batch_size=5)

# print("\n Accuracy: %.4f" % (model.evaluate(x_test, y_test)[1]))

In [148]:
# # 모델을 사용하여 테스트 데이터에 대한 예측 수행
# y_pred = model.predict(x_test)
# y_pred = binarize(y_pred, threshold=0.5)  # 예측값을 0.5 임계값을 기준으로 이진 분류로 변환

# # 정확도(accuracy) 계산
# accuracy = accuracy_score(y_test, y_pred)
# print(f"accuracy: {accuracy:.4f}")

# # F1 점수(f1 score) 계산
# f1 = f1_score(y_test, y_pred)
# print(f"F1-Score: {f1:.4f}")

# # 재현율(recall) 계산
# recall = recall_score(y_test, y_pred)
# print(f"recall: {recall:.4f}")

# # 정밀도(precision) 계산
# precision = precision_score(y_test, y_pred)
# print(f"precision: {precision:.4f}")

# 결과

In [149]:
default

Unnamed: 0,LogisticRegression,RandomForestClassifier,AdaBoostClassifier,BaggingClassifier,SVC_linear,SVC_rbf,LGBMClassifier
Accuracy,0.766,0.797,0.777,0.741,0.782,0.792,0.802
Precision,0.772,0.802,0.813,0.772,0.796,0.812,0.844
Recall,0.772,0.802,0.733,0.703,0.772,0.772,0.752
F1,0.772,0.802,0.771,0.736,0.784,0.792,0.796
ROC AUC,0.766,0.797,0.778,0.742,0.782,0.792,0.803


In [150]:
best

Unnamed: 0,LogisticRegression,BaggingClassifier,AdaBoostClassifier,LGBMClassifier
Accuracy,0.812,0.822,0.746,0.751
Precision,0.808,0.851,0.787,0.802
Recall,0.832,0.792,0.693,0.683
ROC AUC,0.812,0.823,0.748,0.753
Best Parameters,"{'n_estimators': 170, 'min_samples_split': 8, ...","{'n_estimators': 40, 'max_samples': 0.5, 'max_...","{'kernel': 'rbf', 'gamma': 0.0088, 'C': 9.0}","{'num_leaves': 53, 'n_estimators': 80, 'min_ch..."
