# Data Load

In [60]:
from sklearn.datasets import load_digits
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix

digits = load_digits(as_frame=True)

# 문제 파악(data 파악)

In [61]:
print(digits.DESCR)

.. _digits_dataset:

Optical recognition of handwritten digits dataset
--------------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 1797
    :Number of Attributes: 64
    :Attribute Information: 8x8 image of integer pixels in the range 0..16.
    :Missing Attribute Values: None
    :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)
    :Date: July; 1998

This is a copy of the test set of the UCI ML hand-written digits datasets
https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

The data set contains images of hand-written digits: 10 classes where
each class refers to a digit.

Preprocessing programs made available by NIST were used to extract
normalized bitmaps of handwritten digits from a preprinted form. From a
total of 43 people, 30 contributed to the training set and different 13
to the test set. 32x32 bitmaps are divided into nonoverlapping blocks of
4x4 and the number of on pixels are counted in each blo

In [63]:
digits.frame

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0,9
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0,0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0,8
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0,9


위 digits 데이터에서 제공되는 데이터프레임을 살펴보았다.
- pixel_0_0 부터 pixel_7_7까지(총 64개)의 수치(range=0~16)들을 통해 target값을 0부터9까지 가지고 있음을 확인할 수 있다. (= Feature는 64개, target은 10개 임을 알 수 있다.)  
- 총 1797개의 데이터를 가지고 있음을 알 수 있다.
- 아래는 직접 확인해본 결과이다.

In [64]:
print("총 데이터 수:",digits.data.shape)
print("Target의 수:",digits.target_names.shape[0])
print("Feature의 수:",len(digits.feature_names))

총 데이터 수: (1797, 64)
Target의 수: 10
Feature의 수: 64


- 문제 파악 : 8x8 데이터포인트를 가진 데이터(8x8픽셀)를 숫자 0부터 9중 어느 숫자인지 3가지 learning algorithm을 통해 예측해보겠다. 

#### 총 1797개의 데이터를 10개의 class로 구분하는 Multi-class Calssificaion이 목적이다.

# 데이터 전처리

In [65]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target, stratify=digits.target, random_state = 0)
print("Train_data의 수:",X_train.shape[0])
print("Test_data의 수:",X_test.shape[0])

Train_data의 수: 1347
Test_data의 수: 450


- train데이터와 test데이터를 3:1로 분리시켰다.

In [66]:
X_train, X_vali, y_train, y_vali = train_test_split(
X_train, y_train, stratify=y_train, random_state = 0)
print("Train_data의 수:",X_train.shape[0])
print("Validation_data의 수:",X_vali.shape[0])

Train_data의 수: 1010
Validation_data의 수: 337


- 적절한 hyperparameter를 찾기위해 train data를 이용해 validation data를 만들었다.

# Learning Algorithm(1) - Decision Tree

In [67]:
from sklearn.tree import DecisionTreeClassifier

- max_depth, min_samples_leaf, max_leaf_nodes 값의 변화에 따른 validation accuracy 값을 비교해보겠다.

In [68]:
from sklearn.metrics import accuracy_score

md_settings = [1,3,5,7,10,20,50]
msl_settings = [1,3,5,7,10,20,50]
mln_settings = [2,3,5,7,10,20,50]

training_accuracy = []
vali_accuracy = []
md_mls_mln_settings = []
for md in md_settings:
    for msl in msl_settings:
        for mln in mln_settings:
            
            clf_vali = DecisionTreeClassifier(
                max_depth = md, min_samples_leaf = msl,
                max_leaf_nodes = mln, random_state = 0)
            clf_vali.fit(X_train,y_train)

            y_train_hat = clf_vali.predict(X_train)
            y_vali_hat = clf_vali.predict(X_vali)
            
            training_accuracy.append(accuracy_score(y_train,y_train_hat))
            vali_accuracy.append(accuracy_score(y_vali,y_vali_hat))
            
            md_mls_mln_settings.append([md,msl,mln])
            
            

In [69]:
df1 = pd.DataFrame({"md_mls_mln_settings":md_mls_mln_settings,
                      "training_accuracy":training_accuracy,
                      "vali_accuracy":vali_accuracy})
df1

Unnamed: 0,md_mls_mln_settings,training_accuracy,vali_accuracy
0,"[1, 1, 2]",0.200000,0.192878
1,"[1, 1, 3]",0.200000,0.192878
2,"[1, 1, 5]",0.200000,0.192878
3,"[1, 1, 7]",0.200000,0.192878
4,"[1, 1, 10]",0.200000,0.192878
...,...,...,...
338,"[50, 50, 5]",0.434653,0.415430
339,"[50, 50, 7]",0.573267,0.537092
340,"[50, 50, 10]",0.669307,0.629080
341,"[50, 50, 20]",0.761386,0.694362


- max_depth, min_samples_leaf, max_leaf_nodes의 모든 경우에 따른 training accuracy와 validation accuracy를 데이터프레임으로 나타내본것이다.
- 경우가 너무 많아 한번에 파악하기 어려우므로, validation accuracy가 가장 큰 경우의 조합을 찾아보기로 했다.

In [70]:
df1.sort_values(by=['vali_accuracy'],ascending=False)

Unnamed: 0,md_mls_mln_settings,training_accuracy,vali_accuracy
209,"[10, 3, 50]",0.931683,0.807122
314,"[50, 5, 50]",0.918812,0.807122
258,"[20, 3, 50]",0.931683,0.807122
265,"[20, 5, 50]",0.918812,0.807122
216,"[10, 5, 50]",0.918812,0.807122
...,...,...,...
41,"[1, 20, 50]",0.200000,0.192878
42,"[1, 50, 2]",0.200000,0.192878
43,"[1, 50, 3]",0.200000,0.192878
44,"[1, 50, 5]",0.200000,0.192878


In [71]:
df1.loc[df1['vali_accuracy'].idxmax()]

md_mls_mln_settings    [10, 3, 50]
training_accuracy         0.931683
vali_accuracy             0.807122
Name: 209, dtype: object

- [10,3,50] 와 [10,5,50]의 validation accuracy가 약0.807로 가장 높음을 알 수 있다. (나머지 20,50의 depth에 대해서는 10으로 설정했을때와 결과가 같으므로 의미가 없다고 생각하여 제외하겠다.)
- overfitting 될 위험이 있으므로 [10,5,50]에 대해서도 test accuracy를 구해보겠다.

In [72]:
DT1 = DecisionTreeClassifier(max_depth=10,
                             min_samples_leaf = 3, 
                             max_leaf_nodes = 50, 
                             random_state=0)
DT1.fit(X_train,y_train)

y_test_hat = DT1.predict(X_test)

print("Test accuracy for [10,3,50]:",accuracy_score(y_test,y_test_hat))
print(confusion_matrix(y_test, y_test_hat))

print("----------------------------------------------------------------")

DT2 = DecisionTreeClassifier(max_depth=10,
                             min_samples_leaf = 5, 
                             max_leaf_nodes = 50, 
                             random_state=0)
DT2.fit(X_train,y_train)

y_test_hat = DT2.predict(X_test)

print("Test accuracy for [10,5,50]:",accuracy_score(y_test,y_test_hat))
print(confusion_matrix(y_test, y_test_hat))

Test accuracy for [10,3,50]: 0.8555555555555555
[[43  0  0  0  1  0  0  0  1  0]
 [ 0 30  2  0  2  3  0  1  7  1]
 [ 0  2 35  3  0  1  0  1  1  1]
 [ 0  1  2 36  0  2  0  1  4  0]
 [ 0  0  2  0 38  2  0  0  1  2]
 [ 0  0  1  2  0 42  0  0  0  1]
 [ 0  0  1  0  1  0 42  0  1  0]
 [ 0  0  0  0  1  0  0 44  0  0]
 [ 0  2  0  0  0  1  0  1 38  1]
 [ 0  5  1  1  0  0  0  0  1 37]]
----------------------------------------------------------------
Test accuracy for [10,5,50]: 0.8444444444444444
[[43  0  0  0  1  0  0  0  1  0]
 [ 0 32  2  0  1  2  2  1  5  1]
 [ 0  3 33  3  0  1  0  2  1  1]
 [ 0  4  2 36  0  1  0  1  2  0]
 [ 0  1  1  0 38  2  0  0  1  2]
 [ 0  0  0  3  0 42  0  0  0  1]
 [ 0  0  2  0  1  0 40  1  1  0]
 [ 0  0  0  0  1  0  0 44  0  0]
 [ 0  2  0  0  0  1  0  1 38  1]
 [ 0  6  0  1  0  3  0  0  1 34]]


- 위 결과를 통해 [10,5,50] 보다 [10,3,50]의 조합이 test 성능이 더 좋다는것을 확인할 수 있다. (overfitting이 일어나지 않았음을 확인 가능)

#### 최적의 hyperparameter
- max_depth=10, min_samples_leaf = 3, max_leaf_nodes = 50 로 의사결정나무를 만들었을때로 , test 데이터에 대한 accuracy는 약 86%로 나온다.

# Learning Algorithm(2) 
# - Logistic Regression

- C 값과 multi_class 값의 변화에 따른 validation accuracy를 비교해 볼 것이다.

In [73]:
from sklearn.linear_model import LogisticRegression

training_accuracy = []
vali_accuracy = []
C_multiclass_settings = []

C_settings = [0.01, 0.1, 1, 10, 100, 1000, 10000]
multi_class_settings = ['ovr','auto']
for C in C_settings:
    for multiclass in multi_class_settings:
        
        logreg = LogisticRegression(C=C,multi_class =multiclass)
        logreg.fit(X_train, y_train)

        y_train_hat=logreg.predict(X_train)
        y_vali_hat=logreg.predict(X_vali)

        training_accuracy.append(accuracy_score(y_train,y_train_hat))
        vali_accuracy.append(accuracy_score(y_vali,y_vali_hat))
        
        C_multiclass_settings.append([C,multiclass])
    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [74]:
df2 = pd.DataFrame({"C":sorted(C_settings*2),
                   "Multi_class":multi_class_settings*7,
                   "training_accuracy":training_accuracy,
                   "vali_accuracy":vali_accuracy})
df2

Unnamed: 0,C,Multi_class,training_accuracy,vali_accuracy
0,0.01,ovr,0.985149,0.961424
1,0.01,auto,0.994059,0.958457
2,0.1,ovr,0.99901,0.958457
3,0.1,auto,1.0,0.961424
4,1.0,ovr,0.99901,0.94362
5,1.0,auto,1.0,0.95549
6,10.0,ovr,1.0,0.934718
7,10.0,auto,1.0,0.949555
8,100.0,ovr,1.0,0.925816
9,100.0,auto,1.0,0.949555


In [75]:
df2.sort_values(by=['vali_accuracy'],ascending=False)

Unnamed: 0,C,Multi_class,training_accuracy,vali_accuracy
0,0.01,ovr,0.985149,0.961424
3,0.1,auto,1.0,0.961424
1,0.01,auto,0.994059,0.958457
2,0.1,ovr,0.99901,0.958457
5,1.0,auto,1.0,0.95549
7,10.0,auto,1.0,0.949555
9,100.0,auto,1.0,0.949555
11,1000.0,auto,1.0,0.949555
13,10000.0,auto,1.0,0.949555
4,1.0,ovr,0.99901,0.94362


In [76]:
df2.loc[df2['vali_accuracy'].idxmax()]

C                        0.01
Multi_class               ovr
training_accuracy    0.985149
vali_accuracy        0.961424
Name: 0, dtype: object

- 위 결과를 보면 C=0.01, Multi_class='ovr' 와 C=0.10, Multi_class='ovo(auto)'의 vali_accuracy는 0.961424로 최대값을 가짐을 확인 할 수 있다.
#### 최적의 hyperparameter  
- Multi_class = 'ovo(auto)'로 설정했을 때, training accuracy 는 1.0의 값을 가져서 overfitting의 위험이 있음을 알 수 있으므로, C=0.01, Multi_class='ovr'을 사용했을 때를 최적의 hyperparameter로 설정하였다.

In [77]:
logreg_001_ovr = LogisticRegression(C=0.01,multi_class='ovr')
logreg_001_ovr.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(C=0.01, multi_class='ovr')

In [78]:
y_test_hat = logreg_001_ovr.predict(X_test)
print("Test accuracy:",accuracy_score(y_test,y_test_hat))
print(confusion_matrix(y_test, y_test_hat))

Test accuracy: 0.9622222222222222
[[45  0  0  0  0  0  0  0  0  0]
 [ 0 45  0  0  0  1  0  0  0  0]
 [ 0  1 43  0  0  0  0  0  0  0]
 [ 0  1  0 43  0  0  0  0  2  0]
 [ 0  0  0  0 43  0  0  0  1  1]
 [ 0  0  0  0  0 46  0  0  0  0]
 [ 0  2  0  0  0  0 43  0  0  0]
 [ 0  0  0  0  0  0  0 45  0  0]
 [ 0  3  0  1  0  1  0  1 37  0]
 [ 0  0  0  0  0  2  0  0  0 43]]


- C=0.01, multi_class = 'ovr' 로 hyperparameter를 설정했을 때, test accuracy는 약 96%임을 확인할 수 있다.

# Learning Algorithm(3) - SVM

In [79]:
from sklearn.svm import SVC

- feature의 범위를 0-16에서 0-1로 만들기위해 MinMaxScaler를 사용해서 Scaling할 것이다.

In [87]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_trian_scaled = scaler.transform(X_train)
X_vali_scaled = scaler.transform(X_vali)
X_test_scaled = scaler.transform(X_test)

- C 값과 gamma 값의 변화의 따른 validation accuracy를 비교해볼것이다.

In [88]:
C_settings = [2**-5,2**-3,2**-1,2**1,2**3,2**5,2**7,2**9,2**11,2**13,2**15]
gamma_settings = [2**-15,2**-13,2**-11,2**-9,2**-7,2**-5,2**-3,2**-1,2**1,2**3]

training_accuracy = []
vali_accuracy = []

for C in C_settings:
    for gamma in gamma_settings:
        svc = SVC(C=C,kernel = 'rbf',gamma=gamma)
        svc.fit(X_train_scaled,y_train)
        
        y_train_hat = svc.predict(X_train_scaled)
        y_vali_hat = svc.predict(X_vali_scaled)
        
        training_accuracy.append(accuracy_score(y_train,y_train_hat))
        vali_accuracy.append(accuracy_score(y_vali,y_vali_hat))


In [89]:
df3 = pd.DataFrame({"C":sorted(C_settings*len(gamma_settings)),
                    "Gamma":gamma_settings*len(C_settings),
                    "Training_accuracy":training_accuracy,
                    "Validation_accuracy":vali_accuracy})
df3

Unnamed: 0,C,Gamma,Training_accuracy,Validation_accuracy
0,0.03125,0.000031,0.10198,0.100890
1,0.03125,0.000122,0.10198,0.100890
2,0.03125,0.000488,0.10198,0.100890
3,0.03125,0.001953,0.10198,0.100890
4,0.03125,0.007812,0.10198,0.100890
...,...,...,...,...
105,32768.00000,0.031250,1.00000,0.982196
106,32768.00000,0.125000,1.00000,0.982196
107,32768.00000,0.500000,1.00000,0.982196
108,32768.00000,2.000000,1.00000,0.890208


In [90]:
df3.sort_values(by=['Validation_accuracy'], ascending=False)

Unnamed: 0,C,Gamma,Training_accuracy,Validation_accuracy
27,0.50000,0.500000,0.99802,0.985163
36,2.00000,0.125000,0.99703,0.985163
55,32.00000,0.031250,1.00000,0.982196
93,8192.00000,0.001953,1.00000,0.982196
92,8192.00000,0.000488,1.00000,0.982196
...,...,...,...,...
30,2.00000,0.000031,0.10198,0.100890
31,2.00000,0.000122,0.10198,0.100890
40,8.00000,0.000031,0.10198,0.100890
1,0.03125,0.000122,0.10198,0.100890


In [91]:
df3.loc[df3['Validation_accuracy'].idxmax()]

C                      0.500000
Gamma                  0.500000
Training_accuracy      0.998020
Validation_accuracy    0.985163
Name: 27, dtype: float64

#### 최적의 hyperparameter
- 위 결과를 통해 C=0.5, gamma=0.5 일 때, validation accuracy가 가장 높은것을 확인할 수 있다.

In [92]:
svc_05_05 = SVC(C=0.5,kernel='rbf',gamma=0.5)
svc_05_05.fit(X_train_scaled,y_train)

SVC(C=0.5, gamma=0.5)

In [93]:

y_test_hat = svc_05_05.predict(X_test_scaled)
print("Test accuracy:",accuracy_score(y_test,y_test_hat))
print(confusion_matrix(y_test, y_test_hat))

Test accuracy: 0.9844444444444445
[[45  0  0  0  0  0  0  0  0  0]
 [ 0 46  0  0  0  0  0  0  0  0]
 [ 0  1 43  0  0  0  0  0  0  0]
 [ 0  0  0 45  0  0  0  1  0  0]
 [ 0  0  0  0 43  0  0  0  1  1]
 [ 0  0  0  0  0 46  0  0  0  0]
 [ 0  1  0  0  0  0 44  0  0  0]
 [ 0  0  0  0  0  0  0 45  0  0]
 [ 0  1  0  0  0  0  0  0 42  0]
 [ 0  0  0  0  0  1  0  0  0 44]]


- C=0.5, gamma=0.5을 hyperparameter로 설정한 SVM모델의 test accuracy는 약 98.4%임을 알 수 있다.

# 최적의 Learning Algorithm 선택

In [94]:
y_test_hat_DT = DT1.predict(X_test)
y_test_hat_logreg = logreg_001_ovr.predict(X_test)
y_test_hat_svm = svc_05_05.predict(X_test_scaled)


print("DecisionTree_Test accuracy:",accuracy_score(y_test,y_test_hat_DT))
print("LogisticRegression_Test accuracy:",accuracy_score(y_test,y_test_hat_logreg))
print("SVM_Test accuracy:",accuracy_score(y_test,y_test_hat_svm))

DecisionTree_Test accuracy: 0.8555555555555555
LogisticRegression_Test accuracy: 0.9622222222222222
SVM_Test accuracy: 0.9844444444444445


- 위 결과는 3가지 최적의 hyperparameter를 가진 Learning Algorithm들의 test 데이터셋에 대한 성능이다.
- 가장 높은 값을 가지는 SVM이 최적의 Learning Algorithm이라고 할 수 있다.
### 최적의 Learning Algorithm은? -> SVM
