In [None]:
import pandas as pd
import numpy as np

In [None]:
mon=pd.read_csv('../../../mon.csv')

# 배깅- Random Forest

- 기본 모델 학습

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score

# 특징(X)와 타겟(y) 분리
X = mon.drop(columns=['Label'])
y = mon['Label']

# 데이터를 훈련 세트와 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 랜덤 포레스트 모델 초기화 및 학습
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# 테스트 세트 예측
y_pred = model.predict(X_test)

# 성능 평가
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')  # 가중 평균 F1 점수 계산
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"F1 Score (weighted): {f1}")
print("\nClassification Report:\n", classification_rep)
print("\nConfusion Matrix:\n", conf_matrix)

Accuracy: 0.7739473684210526
F1 Score (weighted): 0.772821378878717

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.67      0.73        42
           1       0.84      0.64      0.73        42
           2       0.82      0.94      0.88        35
           3       0.66      0.86      0.75        29
           4       0.83      0.90      0.86        39
           5       0.89      0.93      0.91        45
           6       0.78      0.89      0.83        44
           7       0.78      0.89      0.83        36
           8       0.75      0.71      0.73        34
           9       0.56      0.61      0.58        31
          10       0.95      0.74      0.83        47
          11       0.67      0.80      0.73        35
          12       0.86      0.86      0.86        42
          13       0.66      0.53      0.58        40
          14       0.83      0.56      0.67        36
          15       0.78      0.80      0.7

# 부스팅- XGBoost, LightGBM, CatBoost

In [None]:
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score
import warnings

warnings.filterwarnings("ignore")  # 불필요한 경고 메시지 무시

# 특징(X)와 타겟(y) 분리
X = mon.drop(columns=['Label'])
y = mon['Label']

# 데이터를 훈련 세트와 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. XGBoost
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss', verbose=0)
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
xgb_f1 = f1_score(y_test, xgb_pred, average='weighted')  # F1 Score 계산
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_pred))
print("XGBoost F1 Score (weighted):", xgb_f1)
print("\nClassification Report (XGBoost):\n", classification_report(y_test, xgb_pred))

# 2. LightGBM
lgbm_model = LGBMClassifier(random_state=42, verbose=0)
lgbm_model.fit(X_train, y_train)
lgbm_pred = lgbm_model.predict(X_test)
lgbm_f1 = f1_score(y_test, lgbm_pred, average='weighted')  # F1 Score 계산
print("LightGBM Accuracy:", accuracy_score(y_test, lgbm_pred))
print("LightGBM F1 Score (weighted):", lgbm_f1)
print("\nClassification Report (LightGBM):\n", classification_report(y_test, lgbm_pred))

# 3. CatBoost
cat_model = CatBoostClassifier(random_state=42, verbose=0)
cat_model.fit(X_train, y_train)
cat_pred = cat_model.predict(X_test)
cat_f1 = f1_score(y_test, cat_pred, average='weighted')  # F1 Score 계산
print("CatBoost Accuracy:", accuracy_score(y_test, cat_pred))
print("CatBoost F1 Score (weighted):", cat_f1)
print("\nClassification Report (CatBoost):\n", classification_report(y_test, cat_pred))

# 4. 앙상블 - VotingClassifier
ensemble_model = VotingClassifier(
    estimators=[('xgb', xgb_model), ('lgbm', lgbm_model), ('cat', cat_model)],
    voting='soft'  # soft voting은 각 모델의 확률을 평균하여 최종 예측
)
ensemble_model.fit(X_train, y_train)
ensemble_pred = ensemble_model.predict(X_test)
ensemble_f1 = f1_score(y_test, ensemble_pred, average='weighted')  # F1 Score 계산
print("Ensemble Model Accuracy:", accuracy_score(y_test, ensemble_pred))
print("Ensemble Model F1 Score (weighted):", ensemble_f1)
print("\nClassification Report (Ensemble):\n", classification_report(y_test, ensemble_pred))


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



XGBoost Accuracy: 0.766578947368421
XGBoost F1 Score (weighted): 0.7670990876618595

Classification Report (XGBoost):
               precision    recall  f1-score   support

           0       0.64      0.60      0.62        42
           1       0.81      0.71      0.76        42
           2       0.89      0.91      0.90        35
           3       0.79      0.90      0.84        29
           4       0.86      0.92      0.89        39
           5       0.88      0.84      0.86        45
           6       0.89      0.91      0.90        44
           7       0.88      0.83      0.86        36
           8       0.74      0.74      0.74        34
           9       0.75      0.68      0.71        31
          10       0.84      0.81      0.83        47
          11       0.62      0.71      0.67        35
          12       0.87      0.81      0.84        42
          13       0.51      0.53      0.52        40
          14       0.68      0.64      0.66        36
          15    