In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

In [4]:
data=pd.read_csv('/content/drive/MyDrive/mon.csv')

# 배깅- Random Forest

- 기본 모델 학습

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score

# 특징(X)와 타겟(y) 분리
X = data.drop(columns=['Label'])
y = data['Label']

# 데이터를 훈련 세트와 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 랜덤 포레스트 모델 초기화 및 학습
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# 테스트 세트 예측
y_pred = model.predict(X_test)

# 성능 평가
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')  # 가중 평균 F1 점수 계산
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"F1 Score (weighted): {f1}")
print("\nClassification Report:\n", classification_rep)
print("\nConfusion Matrix:\n", conf_matrix)

Accuracy: 0.7739473684210526
F1 Score (weighted): 0.772821378878717

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.67      0.73        42
           1       0.84      0.64      0.73        42
           2       0.82      0.94      0.88        35
           3       0.66      0.86      0.75        29
           4       0.83      0.90      0.86        39
           5       0.89      0.93      0.91        45
           6       0.78      0.89      0.83        44
           7       0.78      0.89      0.83        36
           8       0.75      0.71      0.73        34
           9       0.56      0.61      0.58        31
          10       0.95      0.74      0.83        47
          11       0.67      0.80      0.73        35
          12       0.86      0.86      0.86        42
          13       0.66      0.53      0.58        40
          14       0.83      0.56      0.67        36
          15       0.78      0.80      0.7

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

# 특징(X)와 타겟(y) 분리
X = data.drop(columns=['Label'])
y = data['Label']

# 데이터를 훈련 세트와 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 랜덤 포레스트 모델 초기화
model = RandomForestClassifier(random_state=42)

# GridSearchCV를 위한 하이퍼파라미터 공간 정의
param_grid = {
    'n_estimators': [100, 200, 300],  # 결정 트리의 수
    'max_depth': [10, 20, 30, None],   # 트리의 최대 깊이
    'min_samples_split': [2, 5, 10],   # 노드를 분할하는 데 필요한 최소 샘플 수
    'min_samples_leaf': [1, 2, 4],     # 리프 노드에 필요한 최소 샘플 수
    'max_features': ['auto', 'sqrt', 'log2'],  # 각 트리에서 고려할 최대 특징 수
    'bootstrap': [True, False]         # 부트스트랩 샘플링 여부
}

# GridSearchCV 초기화 및 학습
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=1, verbose=2, scoring='accuracy')
grid_search.fit(X_train, y_train)

# 최적의 하이퍼파라미터 출력
print(f"Best parameters found: {grid_search.best_params_}")

# 최적의 모델로 예측
y_pred = grid_search.best_estimator_.predict(X_test)

# 성능 평가
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')  # 가중 평균 F1 점수 계산
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"F1 Score (weighted): {f1}")
print("\nClassification Report:\n", classification_rep)
print("\nConfusion Matrix:\n", conf_matrix)


Fitting 5 folds for each of 648 candidates, totalling 3240 fits
[CV] END bootstrap=True, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.0s
[CV] END bootstrap=True, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.0s
[CV] END bootstrap=True, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.0s
[CV] END bootstrap=True, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.0s
[CV] END bootstrap=True, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.0s
[CV] END bootstrap=True, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.0s
[CV] END bootstrap=True, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=200; tot