In [1]:
# 모든 머신러닝 모델에 대하여 성능 평가가 가능함
from model_test import Model_Evaluation 

In [2]:
# 모델 평가 객체
class Model_Evaluation():

    def __init__(self, result, call_model):
        self.catboost_model = call_model # 평가할 모델 호출
        print(f'훈련용 평가지표: {self.catboost_model.score(result.train_final, result.y_train)} / 테스트용 평가지표: {self.catboost_model.score(result.test_final, result.y_test)}')

        # test.csv에 대한 성능평가
        print(f'test.csv 평가지표: {self.catboost_model.score(result.test_df_final, result.test_df_y)}')
        # train.csv에 대한 roc_curve
        print(f"train.csv에 대한 로컬PC Score: {self.do_roc_curve(self.catboost_model, result.test_final, result.y_test)}")
        # test.csv에 대한 roc_curve
        print(f"test.csv에 대한 로컬PC Score: {self.do_roc_curve(self.catboost_model, result.test_df_final, result.test_df_y)}")

        # train.csv에 대한 confusion matrix
        print("=" * 30)
        print("train.csv confusion matrix")
        print(self.do_confusion_matrix(self.catboost_model, result.test_final, result.y_test))

        # test.csv에 대한 confusion matrix
        print("=" * 30)
        print("test.csv confusion matrix")
        print(self.do_confusion_matrix(self.catboost_model, result.test_df_final, result.test_df_y))

        # 훈련 데이터 f1 evaluation 결과
        self.val_acc, self.val_f1, self.val_report = self.get_f1_evaluation(self.catboost_model, result.test_final, result.y_test)
        print("=" * 30)
        print("validation Accuracy:", self.val_acc)
        print("validation Macro-F1:", self.val_f1)
        print(self.val_report)

        # 테스트 데이터 f1 evaluation 결과
        self.val_acc, self.val_f1, self.val_report = self.get_f1_evaluation(self.catboost_model, result.test_df_final, result.test_df_y)
        print("=" * 30)
        print("validation Accuracy:", self.val_acc)
        print("validation Macro-F1:", self.val_f1)
        print(self.val_report)
    
    # train.csv에 대한 roc_curve
    def do_roc_curve(self, model, test, predict):
        from sklearn.metrics import roc_curve, auc

        self.y_predict = model.predict(test)
        self.fpr, self.tpr, self.thresholds = roc_curve(predict, self.y_predict)

        self.score_auc = auc(self.fpr, self.tpr)

        return self.score_auc
    
    # confusion matrix
    def do_confusion_matrix(self, model, test_final, y_test):
        from sklearn.metrics import confusion_matrix
        self.pred_tree = model.predict(test_final)
        self.conf_mx = confusion_matrix(y_test, self.pred_tree, normalize='true')

        return self.conf_mx
    
    # f1-score 지수 평가
    def get_f1_evaluation(self, model, df_final, y_final):
        from sklearn.metrics import accuracy_score, f1_score, classification_report

        # 훈련 데이터 평가
        self.y_pred = model.predict(df_final)
        self.val_acc = accuracy_score(y_final, self.y_pred)
        self.val_f1 = f1_score(y_final, self.y_pred, average="macro")
        self.val_report = classification_report(y_final, self.y_pred)

        return self.val_acc, self.val_f1, self.val_report

In [3]:
# catboost 객체
class Catboost():

    def CallCatboost(result):
        from catboost import CatBoostClassifier

        model = CatBoostClassifier(verbose=False, random_state=result.SEED)
        model.fit(result.train_final, result.y_train)

        return model

In [4]:
from eda import DataProcessing

In [5]:
result = DataProcessing("./data/train.csv", "./data/test.csv")
call_model = Catboost.CallCatboost(result) # 평가할 모델 설정
print("<catboost>")
Model_Evaluation(result, call_model)

<catboost>
훈련용 평가지표: 1.0 / 테스트용 평가지표: 0.9928571428571429
test.csv 평가지표: 0.9953333333333333
train.csv에 대한 로컬PC Score: 0.9928650731452455
test.csv에 대한 로컬PC Score: 0.9953580901856764
train.csv confusion matrix
[[0.99425287 0.00574713]
 [0.00852273 0.99147727]]
test.csv confusion matrix
[[1.         0.        ]
 [0.00928382 0.99071618]]
validation Accuracy: 0.9928571428571429
validation Macro-F1: 0.9928570116593978
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       348
           1       0.99      0.99      0.99       352

    accuracy                           0.99       700
   macro avg       0.99      0.99      0.99       700
weighted avg       0.99      0.99      0.99       700

validation Accuracy: 0.9953333333333333
validation Macro-F1: 0.9953333312592583
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       746
           1       1.00      0.99      1.00       754

    accuracy  

<__main__.Model_Evaluation at 0x2042989d010>

In [6]:
class RandomForest():

    def CallRandomForest(result):
        from sklearn.ensemble import RandomForestClassifier

        model_rf = RandomForestClassifier(random_state=result.SEED)
        model_rf = model_rf.fit(result.train_final, result.y_train)

        return model_rf

In [7]:
print("<RandomForest>")
Model_Evaluation(result, RandomForest.CallRandomForest(result))

<RandomForest>
훈련용 평가지표: 1.0 / 테스트용 평가지표: 0.9714285714285714
test.csv 평가지표: 0.9753333333333334
train.csv에 대한 로컬PC Score: 0.9714276384535006
test.csv에 대한 로컬PC Score: 0.9753290760270514
train.csv confusion matrix
[[0.97126437 0.02873563]
 [0.02840909 0.97159091]]
test.csv confusion matrix
[[0.97453083 0.02546917]
 [0.02387268 0.97612732]]
validation Accuracy: 0.9714285714285714
validation Macro-F1: 0.9714276384535006
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       348
           1       0.97      0.97      0.97       352

    accuracy                           0.97       700
   macro avg       0.97      0.97      0.97       700
weighted avg       0.97      0.97      0.97       700

validation Accuracy: 0.9753333333333334
validation Macro-F1: 0.9753324453013642
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       746
           1       0.97      0.98      0.98       754

    accura

<__main__.Model_Evaluation at 0x2042ae53c50>

In [8]:
class XGBoost():

    def CallXGBoost(result):
        from xgboost import XGBClassifier

        model_xgb = XGBClassifier(
        n_estimators=500,
        max_depth=6,
        learning_rate=0.05,
        random_state=result.SEED,
        n_jobs=-1
        )
        model_xgb = model_xgb.fit(result.train_final, result.y_train)

        return model_xgb

In [9]:
print("<XGBoost>")
Model_Evaluation(result, XGBoost.CallXGBoost(result))

<XGBoost>
훈련용 평가지표: 1.0 / 테스트용 평가지표: 0.99
test.csv 평가지표: 0.9913333333333333
train.csv에 대한 로컬PC Score: 0.9899751828631138
test.csv에 대한 로컬PC Score: 0.9913366424644967
train.csv confusion matrix
[[0.98563218 0.01436782]
 [0.00568182 0.99431818]]
test.csv confusion matrix
[[0.9919571  0.0080429 ]
 [0.00928382 0.99071618]]
validation Accuracy: 0.99
validation Macro-F1: 0.9899989998999901
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       348
           1       0.99      0.99      0.99       352

    accuracy                           0.99       700
   macro avg       0.99      0.99      0.99       700
weighted avg       0.99      0.99      0.99       700

validation Accuracy: 0.9913333333333333
validation Macro-F1: 0.9913331445884821
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       746
           1       0.99      0.99      0.99       754

    accuracy                           0.99

<__main__.Model_Evaluation at 0x20429883c50>

In [10]:
class LightGBM():

    def CallLightGBM(result):
        import lightgbm as lgb

        model_lgbm = lgb.LGBMClassifier(random_state=result.SEED)
        model_lgbm.fit(result.train_final, result.y_train)

        return model_lgbm

In [11]:
print("<LightGBM>")
Model_Evaluation(result, LightGBM.CallLightGBM(result))

<LightGBM>
[LightGBM] [Info] Number of positive: 1409, number of negative: 1391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000887 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 723
[LightGBM] [Info] Number of data points in the train set: 2800, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.503214 -> initscore=0.012857
[LightGBM] [Info] Start training from score 0.012857
훈련용 평가지표: 1.0 / 테스트용 평가지표: 0.9871428571428571
test.csv 평가지표: 0.9886666666666667
train.csv에 대한 로컬PC Score: 0.9871016196447231
test.csv에 대한 로컬PC Score: 0.9886841225705976
train.csv confusion matrix
[[0.97988506 0.02011494]
 [0.00568182 0.99431818]]
test.csv confusion matrix
[[0.9919571  0.0080429 ]
 [0.01458886 0.98541114]]
validation Accuracy: 0.9871428571428571
validation Macro-F1: 0.9871407314270318
              precision    recall  f1-score   support

           0       0.99      0.98      0.9

<__main__.Model_Evaluation at 0x2042c632780>