In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# Wine 데이터 로드
wine = load_wine()

# Wine 데이터를 DataFrame 으로 변환
df_wine = pd.DataFrame(wine.data, columns=wine.feature_names)
df_wine['target'] = wine.target
df_wine['class'] = [wine.target_names[i] for i in wine.target]

# Wine 데이터 분할
X_wine = df_wine.drop(['target', 'class'], axis=1)
y_wine = df_wine['target']

X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(X_wine, y_wine, test_size=0.3, random_state=50)

# 정규화 후 DecisionTree 학습
scaler_wine = StandardScaler()
X_train_wine_scaled = scaler_wine.fit_transform(X_train_wine)
X_test_wine_scaled = scaler_wine.transform(X_test_wine)

dt_wine_scaled = DecisionTreeClassifier(random_state=50)
dt_wine_scaled.fit(X_train_wine_scaled, y_train_wine)

# 예측
y_pred_wine_scaled = dt_wine_scaled.predict(X_test_wine_scaled)

# 평가
accuracy_wine_scaled = accuracy_score(y_test_wine, y_pred_wine_scaled)
train_accuracy_wine_scaled = accuracy_score(y_train_wine, dt_wine_scaled.predict(X_train_wine_scaled))

print(f"훈련 정확도: {train_accuracy_wine_scaled:.4f}")
print(f"테스트 정확도: {accuracy_wine_scaled:.4f}")
print("\n분류 보고서:")
print(classification_report(y_test_wine, y_pred_wine_scaled, target_names=wine.target_names))

훈련 정확도: 1.0000
테스트 정확도: 0.9444

분류 보고서:
              precision    recall  f1-score   support

     class_0       0.93      0.88      0.90        16
     class_1       0.92      0.96      0.94        23
     class_2       1.00      1.00      1.00        15

    accuracy                           0.94        54
   macro avg       0.95      0.94      0.95        54
weighted avg       0.94      0.94      0.94        54



In [6]:
# Breast Cancer 데이터 로드
cancer = load_breast_cancer()

# Breast Cancer 데이터를 DataFrame으로 변환
df_cancer = pd.DataFrame(cancer.data, columns=cancer.feature_names)
df_cancer['target'] = cancer.target
df_cancer['class'] = [cancer.target_names[i] for i in cancer.target]

# Breast Cancer 데이터 분할
X_cancer = df_cancer.drop(['target', 'class'], axis=1)
y_cancer = df_cancer['target']

X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer = train_test_split(X_cancer, y_cancer, test_size=0.3, random_state=50)

# 정규화 후 DecisionTree 학습
scaler_cancer = StandardScaler()
X_train_cancer_scaled = scaler_cancer.fit_transform(X_train_cancer)
X_test_cancer_scaled = scaler_cancer.transform(X_test_cancer)

dt_cancer_scaled = DecisionTreeClassifier(random_state=50)
dt_cancer_scaled.fit(X_train_cancer_scaled, y_train_cancer)

# 예측
y_pred_cancer_scaled = dt_cancer_scaled.predict(X_test_cancer_scaled)

# 평가
accuracy_cancer_scaled = accuracy_score(y_test_cancer, y_pred_cancer_scaled)
train_accuracy_cancer_scaled = accuracy_score(y_train_cancer, dt_cancer_scaled.predict(X_train_cancer_scaled))

print(f"훈련 정확도: {train_accuracy_cancer_scaled:.4f}")
print(f"테스트 정확도: {accuracy_cancer_scaled:.4f}")
print("\n분류 보고서:")
print(classification_report(y_test_cancer, y_pred_cancer_scaled, target_names=cancer.target_names))

훈련 정확도: 1.0000
테스트 정확도: 0.8947

분류 보고서:
              precision    recall  f1-score   support

   malignant       0.82      0.90      0.85        59
      benign       0.94      0.89      0.92       112

    accuracy                           0.89       171
   macro avg       0.88      0.90      0.89       171
weighted avg       0.90      0.89      0.90       171

