In [9]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# 추가 모델 모듈
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, LogisticRegression

# (2) 데이터 준비
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# (3) 데이터 이해하기
print("Target Names:", cancer.target_names)
print("Feature Data Shape:", X.shape)
print("Label Data Shape:", y.shape)

# (4) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# (5) 다양한 모델로 학습시켜보기
# Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

# Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# SVM
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

# SGD Classifier
sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)
sgd_pred = sgd_model.predict(X_test)

# Logistic Regression
lr_model = LogisticRegression(max_iter=5000)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

# (6) 모델을 평가해보기
# Decision Tree
print("Decision Tree Classification Report:")
print(classification_report(y_test, dt_pred, target_names=cancer.target_names))

# Random Forest
print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred, target_names=cancer.target_names))

# SVM
print("SVM Classification Report:")
print(classification_report(y_test, svm_pred, target_names=cancer.target_names))

# SGD Classifier
print("SGD Classifier Classification Report:")
print(classification_report(y_test, sgd_pred, target_names=cancer.target_names))

# Logistic Regression
print("Logistic Regression Classification Report:")
print(classification_report(y_test, lr_pred, target_names=cancer.target_names))


Target Names: ['malignant' 'benign']
Feature Data Shape: (569, 30)
Label Data Shape: (569,)
Decision Tree Classification Report:
              precision    recall  f1-score   support

   malignant       0.91      0.93      0.92        43
      benign       0.96      0.94      0.95        71

    accuracy                           0.94       114
   macro avg       0.93      0.94      0.93       114
weighted avg       0.94      0.94      0.94       114

Random Forest Classification Report:
              precision    recall  f1-score   support

   malignant       0.98      0.93      0.95        43
      benign       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

SVM Classification Report:
              precision    recall  f1-score   support

   malignant       1.00      0.86      0.93        43
      benign       0.92      1.00      0.96 

###(6) 모델을 평가해보기

모델이 예측한 것이 얼마나 올바르게 예측되었는지 확인하기 위해 Precision과 Recall지표가 중요하다고 생각합니다. 때문에 Precision과 Recall 사이의 균형을 확인할 수 있는 F1-Score가 모델의 성능을 평가하는 지표로 좋다고 생각합니다.