In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

In [None]:
data=pd.read_csv('/content/drive/MyDrive/monunmon.csv')

# 배깅- Random Forest

- 기본 모델 학습

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score

# 특징(X)와 타겟(y) 분리
X = data.drop(columns=['Label'])
y = data['Label']

# 데이터를 훈련 세트와 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 랜덤 포레스트 모델 초기화
model = RandomForestClassifier(random_state=42)

# GridSearchCV로 튜닝할 하이퍼파라미터 설정
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# GridSearchCV 설정
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=3,
    verbose=2,
    n_jobs=-1
)

# GridSearchCV 수행
grid_search.fit(X_train, y_train)

# 최적 하이퍼파라미터 및 최적 점수 출력
print("Best Parameters:", grid_search.best_params_)
print("Best F1 Score (weighted):", grid_search.best_score_)

# 최적 모델로 테스트 세트 평가
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"F1 Score (weighted): {f1}")
print("\nClassification Report:\n", classification_rep)
print("\nConfusion Matrix:\n", conf_matrix)


Fitting 3 folds for each of 216 candidates, totalling 648 fits




Best Parameters: {'bootstrap': False, 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Best F1 Score (weighted): 0.7407923378900692
Accuracy: 0.7511363636363636
F1 Score (weighted): 0.7513035293770451

Classification Report:
               precision    recall  f1-score   support

          -1       0.59      0.69      0.64       590
           0       0.63      0.63      0.63        30
           1       0.68      0.61      0.64        44
           2       0.86      0.78      0.82        41
           3       0.77      0.70      0.73        33
           4       0.68      0.81      0.74        32
           5       0.80      0.89      0.85        37
           6       0.86      0.95      0.90        38
           7       0.90      0.77      0.83        35
           8       0.71      0.82      0.76        33
           9       0.73      0.73      0.73        26
          10       0.84      0.74      0.79        43
          11       0.71      0.67  