In [3]:
import pandas as pd
import numpy as np

In [None]:
data=pd.read_csv('../../../../monunmon.csv')

**Gradient Boosting**




- 기본 모델 학습

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score

# 특징(X)와 타겟(y) 분리
X = data.drop(columns=['Label'])
y = data['Label']

# 데이터를 훈련 세트, 검증 세트, 테스트 세트로 분할 (60% train, 20% validation, 20% test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Gradient Boosting 모델 초기화 및 학습
model = GradientBoostingClassifier(random_state=42)
model.fit(X_train, y_train)

# Validation 세트 예측
y_val_pred = model.predict(X_val)

# 테스트 세트 예측
y_test_pred = model.predict(X_test)

# 성능 평가
# Validation accuracy and F1 score
valid_accuracy = accuracy_score(y_val, y_val_pred)
valid_f1 = f1_score(y_val, y_val_pred, average='weighted')  # 가중 평균 F1 점수

# Test accuracy and F1 score
test_accuracy = accuracy_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

# Classification report and confusion matrix
classification_rep = classification_report(y_test, y_test_pred)
conf_matrix = confusion_matrix(y_test, y_test_pred)

# 결과 출력
print(f"Validation Accuracy: {valid_accuracy}")
print(f"Validation F1 Score (weighted): {valid_f1}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Test F1 Score (weighted): {test_f1}")
print("\nClassification Report (Test Data):\n", classification_rep)
print("\nConfusion Matrix (Test Data):\n", conf_matrix)



Validation Accuracy: 0.6
Validation F1 Score (weighted): 0.6043577270716667
Test Accuracy: 0.6170454545454546
Test F1 Score (weighted): 0.6178196231672028

Classification Report (Test Data):
               precision    recall  f1-score   support

          -1       0.41      0.71      0.52       600
           0       0.53      0.45      0.49        40
           1       0.55      0.42      0.48        40
           2       0.81      0.75      0.78        40
           3       0.75      0.68      0.71        40
           4       0.62      0.53      0.57        40
           5       0.62      0.60      0.61        40
           6       0.80      0.70      0.75        40
           7       0.72      0.72      0.72        40
           8       0.79      0.47      0.59        40
           9       0.67      0.55      0.60        40
          10       0.76      0.47      0.58        40
          11       0.57      0.40      0.47        40
          12       0.88      0.72      0.79        

**weighted/SMOTE**

In [None]:
# 필요한 라이브러리 불러오기
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score
from imblearn.over_sampling import SMOTE

# 1. 데이터 로드
data = pd.read_csv("../../../../monunmon.csv")  # 데이터셋 경로 입력

# 2. 특성과 레이블 분리
X = data.drop(columns=['Label'])  # 피처 데이터
y = data['Label']  # 레이블 데이터

# 3. Train, Validation, Test 데이터 나누기
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# 4. 데이터 정규화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

### 방법 1: 클래스 가중치 적용 ###
print("=== Gradient Boosting with Class Weights ===")
class_weights = {label: len(y_train) / (len(set(y_train)) * sum(y_train == label)) for label in set(y_train)}
model_weighted = GradientBoostingClassifier(random_state=42)
model_weighted.fit(X_train, y_train)

# 검증 데이터 평가
y_val_pred = model_weighted.predict(X_val)
valid_accuracy = accuracy_score(y_val, y_val_pred)
valid_f1 = f1_score(y_val, y_val_pred, average='weighted')  # F1-score (weighted)
print(f"Validation Accuracy (Class Weights): {valid_accuracy}")
print(f"Validation F1 Score (Class Weights): {valid_f1}")

# 테스트 데이터 평가
y_test_pred = model_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred, average='weighted')  # F1-score (weighted)
print(f"Test Accuracy (Class Weights): {test_accuracy}")
print(f"Test F1 Score (Class Weights): {test_f1}")

# 분류 보고서
print("\nClassification Report on Test Data (Class Weights):")
print(classification_report(y_test, y_test_pred))


# ### 방법 2: SMOTE를 이용한 오버샘플링 ###
print("\n=== Gradient Boosting with SMOTE ===")
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

print("Resampled Training Set Size (SMOTE):", X_train_smote.shape)

model_smote = GradientBoostingClassifier(random_state=42)
model_smote.fit(X_train_smote, y_train_smote)

# 검증 데이터 평가
y_val_pred_smote = model_smote.predict(X_val)
valid_accuracy_smote = accuracy_score(y_val, y_val_pred_smote)
valid_f1_smote = f1_score(y_val, y_val_pred_smote, average='weighted')  # F1-score (weighted)
print(f"Validation Accuracy (SMOTE): {valid_accuracy_smote}")
print(f"Validation F1 Score (SMOTE): {valid_f1_smote}")

# 테스트 데이터 평가
y_test_pred_smote = model_smote.predict(X_test)
test_accuracy_smote = accuracy_score(y_test, y_test_pred_smote)
test_f1_smote = f1_score(y_test, y_test_pred_smote, average='weighted')  # F1-score (weighted)
print(f"Test Accuracy (SMOTE): {test_accuracy_smote}")
print(f"Test F1 Score (SMOTE): {test_f1_smote}")

# 분류 보고서
print("\nClassification Report on Test Data (SMOTE):")
print(classification_report(y_test, y_test_pred_smote))


=== Gradient Boosting with Class Weights ===
Validation Accuracy (Class Weights): 0.610909090909091
Validation F1 Score (Class Weights): 0.6125206101838206
Test Accuracy (Class Weights): 0.6154545454545455
Test F1 Score (Class Weights): 0.6183474587260465

Classification Report on Test Data (Class Weights):
              precision    recall  f1-score   support

          -1       0.40      0.69      0.51       450
           0       0.55      0.37      0.44        30
           1       0.60      0.50      0.55        30
           2       0.83      0.63      0.72        30
           3       0.79      0.73      0.76        30
           4       0.90      0.63      0.75        30
           5       0.62      0.50      0.56        30
           6       0.88      0.77      0.82        30
           7       0.80      0.80      0.80        30
           8       0.68      0.43      0.53        30
           9       0.78      0.70      0.74        30
          10       0.70      0.47      0.5