<a href="https://colab.research.google.com/github/CodeByJJY/AIByJJY/blob/main/SINSA/Ch7/task/task7_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **라이브러리 설치**

In [29]:
# 필요한 라이브러리 설치 및 불러오기
!pip install ucimlrepo



In [30]:
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, average_precision_score, roc_auc_score
from sklearn.utils import class_weight
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import label_binarize

# **데이터셋 호출 및 전처리**

In [31]:
# 데이터셋 불러오기
wine_quality = fetch_ucirepo(id=186)
X = wine_quality.data.features
y = wine_quality.data.targets

In [32]:
# y가 DataFrame인지 확인하고 Series로 변환
if isinstance(y, pd.DataFrame):
    y = y.squeeze()  # DataFrame을 Series로 변환

# 품질 데이터 라벨링 (1-3: Low, 4-6: Mid, 7-9: High)
def quality_label(quality):
    if quality <= 3:
        return 'Low'
    elif 4 <= quality <= 6:
        return 'Mid'
    else:
        return 'High'

# y 데이터에 새로운 라벨 적용
y = y.apply(quality_label)

In [33]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 클래스 가중치 계산 (불균형 데이터 대처)
weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y), y=y_train)
class_weights = dict(zip(np.unique(y), weights))

# **모델 학습**

In [34]:
# 단일 모델
base_model = DecisionTreeClassifier(class_weight=class_weights, random_state=42)
single_model = base_model.fit(X_train, y_train)
y_pred_single = single_model.predict(X_test)

# 배깅 모델 (class_weight 적용하지 않은 결정 트리 사용)
bagging_model = BaggingClassifier(DecisionTreeClassifier(random_state=42), n_estimators=10, random_state=42)
bagging_model.fit(X_train, y_train)
y_pred_bagging = bagging_model.predict(X_test)

# 부스팅 모델 (class_weight 적용된 결정 트리 사용)
boosting_model = AdaBoostClassifier(base_model, n_estimators=10, random_state=42)
boosting_model.fit(X_train, y_train)
y_pred_boosting = boosting_model.predict(X_test)



# **모델 평가**

In [35]:
# 성능 평가 함수
def evaluate_model(y_true, y_pred, model_name):
    print(f"===== {model_name} 성능 평가 =====")
    print(classification_report(y_true, y_pred, target_names=['Low', 'Mid', 'High']))
    accuracy = accuracy_score(y_true, y_pred)
    print(f"정확도: {accuracy:.4f}")
    return accuracy

# 모델 성능 평가
accuracy_single = evaluate_model(y_test, y_pred_single, "단일 모델")
accuracy_bagging = evaluate_model(y_test, y_pred_bagging, "배깅 모델")
accuracy_boosting = evaluate_model(y_test, y_pred_boosting, "부스팅 모델")

# 다중 클래스 Precision-Recall 및 ROC AUC 계산
y_test_binarized = label_binarize(y_test, classes=['Low', 'Mid', 'High'])
y_proba = boosting_model.predict_proba(X_test)

# Precision-Recall AUC (macro 평균)
avg_precision = average_precision_score(y_test_binarized, y_proba, average="macro")
print(f"평균 Precision-Recall AUC (macro): {avg_precision:.4f}")

# ROC AUC (macro 평균)
roc_auc = roc_auc_score(y_test_binarized, y_proba, average="macro")
print(f"평균 ROC AUC (macro): {roc_auc:.4f}")

===== 단일 모델 성능 평가 =====
              precision    recall  f1-score   support

         Low       0.63      0.63      0.63       252
         Mid       0.00      0.00      0.00         6
        High       0.91      0.90      0.90      1042

    accuracy                           0.85      1300
   macro avg       0.51      0.51      0.51      1300
weighted avg       0.85      0.85      0.85      1300

정확도: 0.8469
===== 배깅 모델 성능 평가 =====
              precision    recall  f1-score   support

         Low       0.68      0.64      0.66       252
         Mid       0.00      0.00      0.00         6
        High       0.91      0.93      0.92      1042

    accuracy                           0.87      1300
   macro avg       0.53      0.52      0.53      1300
weighted avg       0.86      0.87      0.86      1300

정확도: 0.8669
===== 부스팅 모델 성능 평가 =====
              precision    recall  f1-score   support

         Low       0.63      0.65      0.64       252
         Mid       0.14      0.1