In [4]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# 추가 모델 모듈
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, LogisticRegression

# (2) 데이터 준비
digits = load_digits()
X = digits.data
y = digits.target

# (3) 데이터 이해하기
print("Target Names:", digits.target_names)
print("Feature Data Shape:", X.shape)
print("Label Data Shape:", y.shape)

# (4) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# (5) 다양한 모델로 학습시켜보기
# Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

# Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# SVM
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

# SGD Classifier
sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)
sgd_pred = sgd_model.predict(X_test)

# Logistic Regression
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

# (6) 모델을 평가해보기
# Decision Tree
print("Decision Tree Classification Report:")
print(classification_report(y_test, dt_pred))

# Random Forest
print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred))

# SVM
print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

# SGD Classifier
print("SGD Classifier Classification Report:")
print(classification_report(y_test, sgd_pred))

# Logistic Regression
print("Logistic Regression Classification Report:")
print(classification_report(y_test, lr_pred))

Target Names: [0 1 2 3 4 5 6 7 8 9]
Feature Data Shape: (1797, 64)
Label Data Shape: (1797,)
Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.88      0.92        33
           1       0.81      0.79      0.80        28
           2       0.84      0.79      0.81        33
           3       0.75      0.88      0.81        34
           4       0.80      0.89      0.85        46
           5       0.91      0.85      0.88        47
           6       0.87      0.94      0.90        35
           7       0.88      0.88      0.88        34
           8       0.79      0.63      0.70        30
           9       0.78      0.80      0.79        40

    accuracy                           0.84       360
   macro avg       0.84      0.83      0.83       360
weighted avg       0.84      0.84      0.84       360

Random Forest Classification Report:
              precision    recall  f1-score   support

           0       

###(6) 모델을 평가해보기

모델이 예측한 것이 얼마나 올바르게 예측되었는지 확인하기 위해 Precision과 Recall지표가 중요하다고 생각합니다. 때문에 Precision과 Recall 사이의 균형을 확인할 수 있는 F1-Score가 모델의 성능을 평가하는 지표로 좋다고 생각합니다.