# 1. 필요한 모듈 import

In [1]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# 2. 데이터 준비

In [2]:
digits = load_digits()
digits_data = digits.data
digits_label = digits.target

# 3. train, test 데이터 분리

In [3]:
X_train, X_test, y_train, y_test = train_test_split(digits_data, 
                                                    digits_label, 
                                                    test_size=0.2, 
                                                    random_state=10)

# 4. 모델 학습 및 평가

### 4-1. Decision Tree

In [4]:
from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier(random_state=50)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.92      0.93        37
           1       0.86      0.88      0.87        34
           2       0.82      0.82      0.82        34
           3       0.86      0.90      0.88        40
           4       0.83      0.85      0.84        34
           5       0.88      0.91      0.89        32
           6       0.94      0.89      0.92        37
           7       0.93      0.93      0.93        40
           8       0.71      0.76      0.74        33
           9       0.86      0.77      0.81        39

    accuracy                           0.86       360
   macro avg       0.86      0.86      0.86       360
weighted avg       0.87      0.86      0.86       360



In [5]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.8638888888888889

In [6]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.92      0.93        37
           1       0.86      0.88      0.87        34
           2       0.82      0.82      0.82        34
           3       0.86      0.90      0.88        40
           4       0.83      0.85      0.84        34
           5       0.88      0.91      0.89        32
           6       0.94      0.89      0.92        37
           7       0.93      0.93      0.93        40
           8       0.71      0.76      0.74        33
           9       0.86      0.77      0.81        39

    accuracy                           0.86       360
   macro avg       0.86      0.86      0.86       360
weighted avg       0.87      0.86      0.86       360



---------------------------------------------------------------------------

### 4-2. Random Forest

In [7]:
from sklearn.ensemble import RandomForestClassifier

X_train, X_test, y_train, y_test = train_test_split(digits_data, 
                                                    digits_label, 
                                                    test_size=0.2, 
                                                    random_state=10)

random_forest = RandomForestClassifier(random_state=50)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.97      0.99        37
           1       0.97      0.97      0.97        34
           2       0.97      1.00      0.99        34
           3       0.93      0.97      0.95        40
           4       0.97      0.94      0.96        34
           5       0.97      1.00      0.98        32
           6       1.00      1.00      1.00        37
           7       0.93      1.00      0.96        40
           8       0.97      0.91      0.94        33
           9       1.00      0.92      0.96        39

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360



In [8]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9694444444444444

In [9]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.97      0.99        37
           1       0.97      0.97      0.97        34
           2       0.97      1.00      0.99        34
           3       0.93      0.97      0.95        40
           4       0.97      0.94      0.96        34
           5       0.97      1.00      0.98        32
           6       1.00      1.00      1.00        37
           7       0.93      1.00      0.96        40
           8       0.97      0.91      0.94        33
           9       1.00      0.92      0.96        39

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360



--------------------------------------------------------------------

### 4-3. Support Vector Machine (SVM)

In [10]:
from sklearn import svm
svm_model = svm.SVC()

print(svm_model._estimator_type)

svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.97      1.00      0.99        34
           2       0.97      1.00      0.99        34
           3       1.00      0.97      0.99        40
           4       1.00      0.94      0.97        34
           5       0.97      1.00      0.98        32
           6       1.00      1.00      1.00        37
           7       1.00      1.00      1.00        40
           8       0.91      0.97      0.94        33
           9       1.00      0.95      0.97        39

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



In [11]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9833333333333333

In [12]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.97      1.00      0.99        34
           2       0.97      1.00      0.99        34
           3       1.00      0.97      0.99        40
           4       1.00      0.94      0.97        34
           5       0.97      1.00      0.98        32
           6       1.00      1.00      1.00        37
           7       1.00      1.00      1.00        40
           8       0.91      0.97      0.94        33
           9       1.00      0.95      0.97        39

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



--------------------------------------------------------------------

### 4-4. SGD (Stochastic Gradient Descent)

In [13]:
from sklearn.linear_model import SGDClassifier
sgd_model = SGDClassifier()

print(sgd_model._estimator_type)

sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.91      0.85      0.88        34
           2       0.92      1.00      0.96        34
           3       0.93      0.97      0.95        40
           4       0.97      0.91      0.94        34
           5       0.97      1.00      0.98        32
           6       1.00      1.00      1.00        37
           7       1.00      0.95      0.97        40
           8       0.81      0.88      0.84        33
           9       1.00      0.92      0.96        39

    accuracy                           0.95       360
   macro avg       0.95      0.95      0.95       360
weighted avg       0.95      0.95      0.95       360



In [14]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.95

In [15]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.91      0.85      0.88        34
           2       0.92      1.00      0.96        34
           3       0.93      0.97      0.95        40
           4       0.97      0.91      0.94        34
           5       0.97      1.00      0.98        32
           6       1.00      1.00      1.00        37
           7       1.00      0.95      0.97        40
           8       0.81      0.88      0.84        33
           9       1.00      0.92      0.96        39

    accuracy                           0.95       360
   macro avg       0.95      0.95      0.95       360
weighted avg       0.95      0.95      0.95       360



--------------------------------------------------------------------

### 4-5. Logistic Regression

In [16]:
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression()

print(logistic_model._estimator_type)

logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       0.97      1.00      0.99        37
           1       0.91      0.91      0.91        34
           2       0.92      1.00      0.96        34
           3       1.00      0.95      0.97        40
           4       0.94      0.91      0.93        34
           5       0.94      0.94      0.94        32
           6       0.97      0.97      0.97        37
           7       0.97      0.95      0.96        40
           8       0.89      0.94      0.91        33
           9       0.97      0.92      0.95        39

    accuracy                           0.95       360
   macro avg       0.95      0.95      0.95       360
weighted avg       0.95      0.95      0.95       360



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [17]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.95

In [18]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      1.00      0.99        37
           1       0.91      0.91      0.91        34
           2       0.92      1.00      0.96        34
           3       1.00      0.95      0.97        40
           4       0.94      0.91      0.93        34
           5       0.94      0.94      0.94        32
           6       0.97      0.97      0.97        37
           7       0.97      0.95      0.96        40
           8       0.89      0.94      0.91        33
           9       0.97      0.92      0.95        39

    accuracy                           0.95       360
   macro avg       0.95      0.95      0.95       360
weighted avg       0.95      0.95      0.95       360



----------------------------------------------------------------------------

# 5. 회고

Logistic Regression모델에서 accuracy는 나왔으나, 오류메시지가 떴는데, 반복횟수를 늘리라는 말인것으로(?) 추정되나
어떻게 조치해야할지 구글링을 해서 조치를 해봤으나 계속 오류가 뜬다..ㅠㅠ
손글씨 데이터는 모든 숫자를 잘 맞추는 것이 중요하여 accuracy그리고 F1-score 가 높아야하는것 같은데,
여러가지 모델로 실행한 결과, Support Vector Machine (SVM)모델의 accuracy가 가장 높게 나왔다.