In [1]:
import pandas as pd

In [2]:
base_df = pd.read_csv("../data/final_dataset.csv")

In [7]:
base_df["num_of_prev_attempts"].value_counts()

num_of_prev_attempts
0    28381
1     3293
2      674
3      142
4       38
5       13
6        4
Name: count, dtype: int64

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import joblib
from pathlib import Path
# 상위 폴더를 Python 경로에 추가
sys.path.append(str(Path.cwd().parent))
from util.model_io import load_models
import warnings
warnings.filterwarnings('ignore')


# ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
MODEL_PATH = os.path.join("../", "model", "all_models.pkl")

# 모델 + 평가 지표 불러오기 (전체 페이지에서 사용)
model_bundle = load_models(MODEL_PATH)

[✔] 전체 모델과 지표가 로드되었습니다: ../model\all_models.pkl


In [4]:
model_names = list(model_bundle.keys())

In [5]:
model_names

['Decision Tree',
 'Random Forest',
 'KNN',
 'SVM',
 'XGBoost',
 'Logistic Regression',
 'SGDClassifier',
 'AdaBoost',
 'CatBoost',
 'Naive Bayes',
 'LightGBM',
 'MLPClassifier']

In [None]:
# # ------------------ 입력 폼 ------------------
# with st.form("dropout_form"):
#     st.markdown("### 📋 학습자 정보 입력")

#     # 🔹 줄 1: ID, 성별, 나이대, 장애 여부
#     col1, col2, col3, col4 = st.columns(4)
#     with col1:
#         gender = st.selectbox("성별", ["M", "F"])
#     with col2:
#         age_band = st.selectbox("연령대", ['55<=', '35-55', '0-35'])
#     with col3:
#         disability = st.selectbox("장애 등록 여부", ["N", "Y"])
#     with col4:
#         region = st.selectbox("지역", ['East Anglian Region', 'Scotland', 'North Western Region',
#        'South East Region', 'West Midlands Region', 'Wales',
#        'North Region', 'South Region', 'Ireland', 'South West Region',
#        'East Midlands Region', 'Yorkshire Region', 'London Region'])
#     # 🔹 줄 2: 과목 코드, 학기, 지역, 학력
#     col5, col6 = st.columns(2)
#     with col5:
#         highest_education = st.selectbox("최종 학력", ['HE Qualification', 'A Level or Equivalent', 'Lower Than A Level',
#        'Post Graduate Qualification', 'No Formal quals'])
#     with col6:
#         imd_band = st.selectbox("소득구간", ['90-100%', '20-30%', '30-40%', '50-60%', '80-90%', '70-80%',
#        '60-70%', '40-50%', '10-20', '0-10%'])

# 🔹 모델 선택 추가
model_names = list(model_bundle.keys())

#     st.markdown("### 🔍 예측 모델 선택")
#     selected_model = st.selectbox("사용할 모델", model_names)

#     submitted = st.form_submit_button("📊 예측하기")


# ------------------ 결과 출력 ------------------

# if submitted:

In [6]:
import pickle
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib
import numpy as np

# ✅ 모델 로드 및 컬럼 정렬
model_info = model_bundle[input("모델 이름")]
model = model_info["model"]


In [7]:
X_test = model_info["metrics"]["X_test"]

cat_columns = ['code_module', 'code_presentation', 'gender', 'region', 'highest_education',
            'imd_band', 'age_band', 'num_of_prev_attempts', 'disability']
num_columns = ['studied_credits', 'date_registration', 'date_unregistration', 'sum_click', 'avg_score']


# 2. 전처리기 다시 정의 (학습 때 사용한 구조와 동일하게)
fe_transformer = ColumnTransformer([
    ("category_ohe", OneHotEncoder(handle_unknown='ignore'), cat_columns),
    ("number_scaler", StandardScaler(), num_columns)
])

# 3. 기존 데이터 불러오기 (유사도 비교용)
base_df = pd.read_csv("../data/final_dataset.csv")
X_base = base_df.drop(columns="target")

In [8]:

# 4. 새로운 입력 데이터 예시 (범주형만 입력)
# new_input = pd.DataFrame([{
#     'gender': "F",
#     'region': "BB",
#     "highest_education": "2",
#     "imd_band": "4.0",
#     "age_band": "45",
#     "disability": "Y", 
#     'code_module': "AAA",
#     'code_presentation': "2013J",
#     "num_of_prev_attempts": "0", 
#     "is_dropout": "1"
# }], dtype=object)

imd_order = {
    "0-10%": 1,
    "10-20": 2,
    "20-30%": 3,
    "30-40%": 4,
    "40-50%": 5,
    "50-60%": 6,
    "60-70%": 7,
    "70-80%": 8,
    "80-90%": 9,
    "90-100%": 10
}

education_order = {
    "No Formal Quals": 0,
    "Lower Than A Level": 1,
    "A Level Or Equivalent": 2,
    "He Qualification": 3,
    "Post Graduate Qualification": 4
}

age_map = {
    "0-35": 30,
    "35-55": 45,
    "55<=": 60
}

new_input = pd.DataFrame([{
    "gender": input("gender"),
    "region": input("region"),
    "highest_education": input("highest_education"),
    "imd_band": input("imd_band"),
    "age_band": input("age_band"),
    "disability": input("disability"),
    'code_module': "BBB",
    'code_presentation': "2014J",
    "num_of_prev_attempts": "0"
}])

# 문자열 표준화 + 매핑 적용
new_input["imd_band"] = new_input["imd_band"].str.strip().str.title().replace(imd_order).astype("Int64")
new_input["highest_education"] = new_input["highest_education"].str.strip().str.title().replace(education_order).astype("Int64")
new_input["age_band"] = new_input["age_band"].str.strip().replace(age_map).astype("Int64")


# ✅ 입력값을 DataFrame 형태로 생성


new_input = new_input.astype({
    'code_module': 'object',
    'code_presentation': 'object',
    'gender': 'object',
    'region': 'object',
    'highest_education': 'object',
    'imd_band': 'float64',
    'age_band': 'int64',
    'num_of_prev_attempts': 'int64',
    'disability': 'object'
})

In [9]:
X_base_cat = X_base[cat_columns]
new_cat_input = new_input[cat_columns]

In [10]:
X_base_cat

Unnamed: 0,code_module,code_presentation,gender,region,highest_education,imd_band,age_band,num_of_prev_attempts,disability
0,AAA,2013J,M,East Anglian Region,3,10.0,60,0,N
1,AAA,2013J,F,Scotland,3,3.0,45,0,N
2,AAA,2013J,F,North Western Region,2,4.0,45,0,Y
3,AAA,2013J,F,South East Region,2,6.0,45,0,N
4,AAA,2013J,F,West Midlands Region,1,6.0,30,0,N
...,...,...,...,...,...,...,...,...,...
32540,GGG,2014J,F,Wales,1,2.0,30,0,N
32541,GGG,2014J,F,East Anglian Region,1,5.0,45,0,N
32542,GGG,2014J,F,South Region,2,3.0,30,0,Y
32543,GGG,2014J,F,South East Region,1,10.0,45,0,N


In [11]:
new_cat_input

Unnamed: 0,code_module,code_presentation,gender,region,highest_education,imd_band,age_band,num_of_prev_attempts,disability
0,BBB,2014J,M,North Western Region,3,6.0,45,0,Y


In [12]:
from scipy.sparse import issparse
# 원핫 인코딩만 따로 적용해서 비교용 데이터 생성
cat_encoder = OneHotEncoder(handle_unknown='ignore')
X_base_cat_encoded = cat_encoder.fit_transform(X_base_cat).toarray()
new_cat_encoded = cat_encoder.transform(new_cat_input).toarray()

# 6. 유사한 기존 데이터 1개 선택
similarities = cosine_similarity(new_cat_encoded, X_base_cat_encoded)
most_similar_index = np.argmax(similarities)

# 7. 해당 인덱스의 수치형 컬럼을 사용해 입력 데이터 보완
new_complete_input = new_input.copy()
for col in num_columns:
    new_complete_input[col] = X_base.loc[most_similar_index, col]

# 수치형까지 포함된 최종 입력 데이터를 원래 순서로 정렬
new_complete_input = new_complete_input.reindex(columns=X_base.columns)

# 8. 전처리 후 예측
fe_transformer.fit(X_base)  # DataFrame 그대로
X_transformed = fe_transformer.transform(new_complete_input)
if issparse(X_transformed):
    X_transformed = X_transformed.toarray()

In [13]:

# 예측
prediction = model.predict(X_transformed)
print("✅ 예측 결과:", prediction)

# 확률 예측
proba = model.predict_proba(X_transformed)
# 실제 예측 클래스
pred_class = model.predict(X_transformed)

# 이탈률 계산
if pred_class[0] == 0:
    dropout_prob = 1 - proba[0][0]
else:
    dropout_prob = proba[0][1]

print(f"📊 예측 클래스: {pred_class[0]}")
print(f"🔥 이탈률 (확률): {dropout_prob:.4f}")

✅ 예측 결과: [0]
📊 예측 클래스: 0
🔥 이탈률 (확률): 0.2178


In [21]:
# explainer = shap.Explainer(model, X_transformed)
# shap_values = explainer(X_transformed)

# top_idx = np.argsort(np.abs(shap_values.values[0]))[::-1]
# print("\n🔍 예측에 가장 영향을 준 주요 피처:")
# for i in top_idx[:5]:
#     print(f"{shap_values.feature_names[i]}: {shap_values.values[0][i]:.4f}")

In [18]:
from sklearn.inspection import permutation_importance

def get_most_important_feature(model, X, y):
    result = permutation_importance(model, X, y, n_repeats=10, random_state=42, n_jobs=-1)
    importances = result.importances_mean
    most_important_idx = importances.argmax()
    return X.columns[most_important_idx], importances[most_important_idx]

In [19]:
import joblib

model = joblib.load("../model/all_models.pkl")  # 또는 pickle.load(open("model.pkl", "rb"))

In [33]:
from sklearn.model_selection import train_test_split

# 예측 대상 컬럼 이름
target_column = 'target'  # 실제 컬럼명으로 바꿔주세요

# X, y 분리
X = base_df.drop(columns=[target_column])
y = base_df[target_column]

# train/test 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 예시로 Decision Tree 모델만 꺼내기
dt_model = model_bundle['SVM']['model']

# 그 모델로 permutation_importance 사용
result = permutation_importance(dt_model, X_test, y_test, n_repeats=10, random_state=42, n_jobs=-1)

ValueError: could not convert string to float: 'CCC'

In [None]:
model_bundle

{'model': SVC(C=0.1, gamma=0.1, kernel='linear', probability=True, random_state=42),
 'metrics': {'best_params': {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'},
  'cv_score': np.float64(0.8835222079007623),
  'test_accuracy': 0.8826348777190611,
  'classification_report': {'0': {'precision': 0.8367394879022786,
    'recall': 0.9319727891156463,
    'f1-score': 0.8817923010273548,
    'support': 3822.0},
   '1': {'precision': 0.9329896907216495,
    'recall': 0.8389339513325609,
    'f1-score': 0.8834655277608298,
    'support': 4315.0},
   'accuracy': 0.8826348777190611,
   'macro avg': {'precision': 0.884864589311964,
    'recall': 0.8854533702241036,
    'f1-score': 0.8826289143940923,
    'support': 8137.0},
   'weighted avg': {'precision': 0.8877803660103756,
    'recall': 0.8826348777190611,
    'f1-score': 0.8826796026563267,
    'support': 8137.0}},
  'confusion_matrix': array([[3562,  260],
         [ 695, 3620]]),
  'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  

In [24]:
# 예시: 모델 리스트 중 하나 돌렸다고 가정
# model.fit(X_train, y_train)
feature, score = get_most_important_feature(model, X_train, y_train)
print(f"가장 영향을 많이 준 변수: {feature} (중요도: {score:.4f})")

InvalidParameterError: The 'estimator' parameter of permutation_importance must be an object implementing 'fit'. Got {'Decision Tree': {'model': DecisionTreeClassifier(max_depth=5, max_features=3, max_leaf_nodes=5,
                       min_samples_leaf=2, random_state=42), 'metrics': {'best_params': {'max_depth': 5, 'max_features': 3, 'max_leaf_nodes': 5, 'min_samples_leaf': 2}, 'cv_score': np.float64(0.8025240292916518), 'test_accuracy': 0.795379132358363, 'classification_report': {'0': {'precision': 0.7649226234340457, 'recall': 0.814756671899529, 'f1-score': 0.7890535917901939, 'support': 3822.0}, '1': {'precision': 0.8258730939498279, 'recall': 0.778215527230591, 'f1-score': 0.8013363560434316, 'support': 4315.0}, 'accuracy': 0.795379132358363, 'macro avg': {'precision': 0.7953978586919368, 'recall': 0.79648609956506, 'f1-score': 0.7951949739168127, 'support': 8137.0}, 'weighted avg': {'precision': 0.7972442751823068, 'recall': 0.795379132358363, 'f1-score': 0.7955670645384698, 'support': 8137.0}}, 'confusion_matrix': array([[3114,  708],
       [ 957, 3358]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'Random Forest': {'model': RandomForestClassifier(max_depth=10, min_samples_leaf=2, n_estimators=50,
                       random_state=42), 'metrics': {'best_params': {'max_depth': 10, 'min_samples_leaf': 2, 'n_estimators': 50}, 'cv_score': np.float64(0.8871273633241321), 'test_accuracy': 0.8922207201671377, 'classification_report': {'0': {'precision': 0.8472058476774346, 'recall': 0.9400837257980115, 'f1-score': 0.891231551531688, 'support': 3822.0}, '1': {'precision': 0.9412217659137577, 'recall': 0.8498261877172654, 'f1-score': 0.8931920594324686, 'support': 4315.0}, 'accuracy': 0.8922207201671377, 'macro avg': {'precision': 0.8942138067955961, 'recall': 0.8949549567576385, 'f1-score': 0.8922118054820782, 'support': 8137.0}, 'weighted avg': {'precision': 0.8970618987023496, 'recall': 0.8922207201671377, 'f1-score': 0.8922711965595691, 'support': 8137.0}}, 'confusion_matrix': array([[3593,  229],
       [ 648, 3667]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'KNN': {'model': KNeighborsClassifier(), 'metrics': {'best_params': {'n_neighbors': 5, 'weights': 'uniform'}, 'cv_score': np.float64(0.8450102610083947), 'test_accuracy': 0.8515423374708123, 'classification_report': {'0': {'precision': 0.7957013574660633, 'recall': 0.9201988487702774, 'f1-score': 0.8534336326134433, 'support': 3822.0}, '1': {'precision': 0.9179445789615281, 'recall': 0.7907300115874856, 'f1-score': 0.849601593625498, 'support': 4315.0}, 'accuracy': 0.8515423374708123, 'macro avg': {'precision': 0.8568229682137958, 'recall': 0.8554644301788814, 'f1-score': 0.8515176131194706, 'support': 8137.0}, 'weighted avg': {'precision': 0.8605261701430856, 'recall': 0.8515423374708123, 'f1-score': 0.8514015264031712, 'support': 8137.0}}, 'confusion_matrix': array([[3517,  305],
       [ 903, 3412]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'SVM': {'model': SVC(C=0.1, gamma=0.1, kernel='linear', probability=True, random_state=42), 'metrics': {'best_params': {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}, 'cv_score': np.float64(0.8835222079007623), 'test_accuracy': 0.8826348777190611, 'classification_report': {'0': {'precision': 0.8367394879022786, 'recall': 0.9319727891156463, 'f1-score': 0.8817923010273548, 'support': 3822.0}, '1': {'precision': 0.9329896907216495, 'recall': 0.8389339513325609, 'f1-score': 0.8834655277608298, 'support': 4315.0}, 'accuracy': 0.8826348777190611, 'macro avg': {'precision': 0.884864589311964, 'recall': 0.8854533702241036, 'f1-score': 0.8826289143940923, 'support': 8137.0}, 'weighted avg': {'precision': 0.8877803660103756, 'recall': 0.8826348777190611, 'f1-score': 0.8826796026563267, 'support': 8137.0}}, 'confusion_matrix': array([[3562,  260],
       [ 695, 3620]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'XGBoost': {'model': XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              feature_weights=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.01, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=1000,
              n_jobs=None, num_parallel_tree=None, ...), 'metrics': {'best_params': {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 1000}, 'cv_score': np.float64(0.8996231321427022), 'test_accuracy': 0.897751013887182, 'classification_report': {'0': {'precision': 0.859720885466795, 'recall': 0.934850863422292, 'f1-score': 0.8957132113311607, 'support': 3822.0}, '1': {'precision': 0.9374529012810852, 'recall': 0.8648899188876014, 'f1-score': 0.8997107039537127, 'support': 4315.0}, 'accuracy': 0.897751013887182, 'macro avg': {'precision': 0.89858689337394, 'recall': 0.8998703911549466, 'f1-score': 0.8977119576424366, 'support': 8137.0}, 'weighted avg': {'precision': 0.9009416852994928, 'recall': 0.897751013887182, 'f1-score': 0.8978330565648232, 'support': 8137.0}}, 'confusion_matrix': array([[3573,  249],
       [ 583, 3732]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'Logistic Regression': {'model': LogisticRegression(C=0.1, max_iter=5000, random_state=42, solver='saga'), 'metrics': {'best_params': {'C': 0.1, 'penalty': 'l2', 'solver': 'saga'}, 'cv_score': np.float64(0.8825798703951253), 'test_accuracy': 0.8828806685510631, 'classification_report': {'0': {'precision': 0.8408172962698979, 'recall': 0.9259549973835688, 'f1-score': 0.8813348275432699, 'support': 3822.0}, '1': {'precision': 0.9279531568228105, 'recall': 0.8447276940903824, 'f1-score': 0.8843867523959723, 'support': 4315.0}, 'accuracy': 0.8828806685510631, 'macro avg': {'precision': 0.8843852265463542, 'recall': 0.8853413457369756, 'f1-score': 0.882860789969621, 'support': 8137.0}, 'weighted avg': {'precision': 0.8870248959117584, 'recall': 0.8828806685510631, 'f1-score': 0.882953244126705, 'support': 8137.0}}, 'confusion_matrix': array([[3539,  283],
       [ 670, 3645]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'SGDClassifier': {'model': SGDClassifier(alpha=0.01, eta0=0.1, learning_rate='adaptive', loss='log_loss',
              random_state=42), 'metrics': {'best_params': {'alpha': 0.01, 'eta0': 0.1, 'learning_rate': 'adaptive', 'loss': 'log_loss'}, 'cv_score': np.float64(0.8793022144994331), 'test_accuracy': 0.8793167014870346, 'classification_report': {'0': {'precision': 0.8236098450319052, 'recall': 0.9455782312925171, 'f1-score': 0.8803897685749087, 'support': 3822.0}, '1': {'precision': 0.9445185382768738, 'recall': 0.8206257242178447, 'f1-score': 0.8782242063492064, 'support': 4315.0}, 'accuracy': 0.8793167014870346, 'macro avg': {'precision': 0.8840641916543894, 'recall': 0.883101977755181, 'f1-score': 0.8793069874620576, 'support': 8137.0}, 'weighted avg': {'precision': 0.8877269657584678, 'recall': 0.8793167014870346, 'f1-score': 0.8792413845262538, 'support': 8137.0}}, 'confusion_matrix': array([[3614,  208],
       [ 774, 3541]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'AdaBoost': {'model': AdaBoostClassifier(learning_rate=0.5, n_estimators=100, random_state=42), 'metrics': {'best_params': {'learning_rate': 0.5, 'n_estimators': 100}, 'cv_score': np.float64(0.8779088811039906), 'test_accuracy': 0.8746466756789971, 'classification_report': {'0': {'precision': 0.8182644252612449, 'recall': 0.9424385138670853, 'f1-score': 0.8759727626459144, 'support': 3822.0}, '1': {'precision': 0.9410977242302544, 'recall': 0.8146002317497103, 'f1-score': 0.8732919254658386, 'support': 4315.0}, 'accuracy': 0.8746466756789971, 'macro avg': {'precision': 0.8796810747457496, 'recall': 0.8785193728083978, 'f1-score': 0.8746323440558765, 'support': 8137.0}, 'weighted avg': {'precision': 0.8834021523168275, 'recall': 0.8746466756789971, 'f1-score': 0.8745511315248591, 'support': 8137.0}}, 'confusion_matrix': array([[3602,  220],
       [ 800, 3515]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'CatBoost': {'model': <catboost.core.CatBoostClassifier object at 0x0000017A2830D6D0>, 'metrics': {'best_params': {'depth': 5, 'iterations': 500, 'l2_leaf_reg': 3, 'learning_rate': 0.03}, 'cv_score': np.float64(0.900688395278333), 'test_accuracy': 0.8983654909671869, 'classification_report': {'0': {'precision': 0.8626786146766772, 'recall': 0.9319727891156463, 'f1-score': 0.8959879260470381, 'support': 3822.0}, '1': {'precision': 0.935129740518962, 'recall': 0.8685979142526072, 'f1-score': 0.9006367896191277, 'support': 4315.0}, 'accuracy': 0.8983654909671869, 'macro avg': {'precision': 0.8989041775978196, 'recall': 0.9002853516841267, 'f1-score': 0.8983123578330829, 'support': 8137.0}, 'weighted avg': {'precision': 0.901098991720976, 'recall': 0.8983654909671869, 'f1-score': 0.8984531892046597, 'support': 8137.0}}, 'confusion_matrix': array([[3562,  260],
       [ 567, 3748]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'Naive Bayes': {'model': GaussianNB(var_smoothing=1e-07), 'metrics': {'best_params': {'var_smoothing': 1e-07}, 'cv_score': np.float64(0.829031213256496), 'test_accuracy': 0.8280693130146246, 'classification_report': {'0': {'precision': 0.7404246874379837, 'recall': 0.9761904761904762, 'f1-score': 0.8421171425347026, 'support': 3822.0}, '1': {'precision': 0.9706262104583603, 'recall': 0.6968713789107763, 'f1-score': 0.8112774854984487, 'support': 4315.0}, 'accuracy': 0.8280693130146246, 'macro avg': {'precision': 0.8555254489481721, 'recall': 0.8365309275506263, 'f1-score': 0.8266973140165756, 'support': 8137.0}, 'weighted avg': {'precision': 0.8624991094403096, 'recall': 0.8280693130146246, 'f1-score': 0.8257630660800589, 'support': 8137.0}}, 'confusion_matrix': array([[3731,   91],
       [1308, 3007]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'LightGBM': {'model': LGBMClassifier(num_leaves=30, random_state=42), 'metrics': {'best_params': {'learning_rate': 0.1, 'n_estimators': 100, 'num_leaves': 30}, 'cv_score': np.float64(0.8986809457132183), 'test_accuracy': 0.8988570726311909, 'classification_report': {'0': {'precision': 0.8624607203287407, 'recall': 0.9335426478283622, 'f1-score': 0.8965950496293504, 'support': 3822.0}, '1': {'precision': 0.9365, 'recall': 0.8681344148319815, 'f1-score': 0.9010222489476849, 'support': 4315.0}, 'accuracy': 0.8988570726311909, 'macro avg': {'precision': 0.8994803601643704, 'recall': 0.9008385313301719, 'f1-score': 0.8988086492885177, 'support': 8137.0}, 'weighted avg': {'precision': 0.9017232853750089, 'recall': 0.8988570726311909, 'f1-score': 0.8989427656252473, 'support': 8137.0}}, 'confusion_matrix': array([[3568,  254],
       [ 569, 3746]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}, 'MLPClassifier': {'model': MLPClassifier(activation='tanh', alpha=0.001, batch_size=64,
              early_stopping=True, max_iter=1000, random_state=42), 'metrics': {'best_params': {'activation': 'tanh', 'alpha': 0.001, 'batch_size': 64, 'hidden_layer_sizes': (100,), 'learning_rate_init': 0.001}, 'cv_score': np.float64(0.8956081239019177), 'test_accuracy': 0.8948015239031584, 'classification_report': {'0': {'precision': 0.8629466470876163, 'recall': 0.9225536368393511, 'f1-score': 0.8917551846231664, 'support': 3822.0}, '1': {'precision': 0.9269316218217724, 'recall': 0.8702201622247973, 'f1-score': 0.8976810901267033, 'support': 4315.0}, 'accuracy': 0.8948015239031584, 'macro avg': {'precision': 0.8949391344546943, 'recall': 0.8963868995320742, 'f1-score': 0.8947181373749349, 'support': 8137.0}, 'weighted avg': {'precision': 0.8968774773663288, 'recall': 0.8948015239031584, 'f1-score': 0.8948976550972677, 'support': 8137.0}}, 'confusion_matrix': array([[3526,  296],
       [ 560, 3755]]), 'X_test': array([[ 0.        ,  0.        ,  1.        , ...,  0.6671221 ,
        -0.26152503,  1.1707608 ],
       [ 0.        ,  0.        ,  1.        , ..., -1.5071683 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.6671221 ,
        -0.41684842,  0.35137102],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -1.5152236 ,
        -0.71863645, -1.9110389 ],
       [ 0.        ,  0.        ,  0.        , ..., -1.498242  ,
        -0.61764675,  0.32650936],
       [ 0.        ,  1.        ,  0.        , ...,  0.6671221 ,
        -0.23317704,  1.0733274 ]], shape=(8137, 58), dtype=float32), 'y_test': array([0, 1, 0, ..., 1, 1, 0], shape=(8137,))}}} instead.