# 다층 퍼셉트론 프로그래밍

In [1]:
from sklearn import datasets
from sklearn.neural_network import MLPClassifier # 다층 퍼셉트론
from sklearn.model_selection import train_test_split # 학습데이터와 검증 데이터를 구분
import numpy as np

In [2]:
digit = datasets.load_digits()
x_train, x_test, y_train, y_test = train_test_split(digit.data, digit.target, test_size = 0.3, random_state = 32) # 7:3으로 분류

In [3]:
# MLP 분류기 학습
mlp = MLPClassifier(hidden_layer_sizes = (100), learning_rate_init=0.001,
                    batch_size = 32, max_iter = 300 # 최대 300번까지 학습이지만 그전에 최적화 되면 stop
                   , solver = 'sgd', verbose = True) # verbose : 학습과정을 보여줌(디폴트는 False)

In [4]:
mlp.fit(x_train,y_train)

Iteration 1, loss = 1.89129402
Iteration 2, loss = 0.29781083
Iteration 3, loss = 0.19976415
Iteration 4, loss = 0.14876073
Iteration 5, loss = 0.12494284
Iteration 6, loss = 0.10421923
Iteration 7, loss = 0.09342525
Iteration 8, loss = 0.08204918
Iteration 9, loss = 0.07446860
Iteration 10, loss = 0.06514663
Iteration 11, loss = 0.05968044
Iteration 12, loss = 0.05763656
Iteration 13, loss = 0.05187656
Iteration 14, loss = 0.04855765
Iteration 15, loss = 0.04423666
Iteration 16, loss = 0.03994728
Iteration 17, loss = 0.03754712
Iteration 18, loss = 0.03501431
Iteration 19, loss = 0.03416103
Iteration 20, loss = 0.03161469
Iteration 21, loss = 0.03022751
Iteration 22, loss = 0.02729125
Iteration 23, loss = 0.02856543
Iteration 24, loss = 0.02629268
Iteration 25, loss = 0.02451510
Iteration 26, loss = 0.02257927
Iteration 27, loss = 0.02231760
Iteration 28, loss = 0.02083924
Iteration 29, loss = 0.02007392
Iteration 30, loss = 0.01931455
Iteration 31, loss = 0.01859597
Iteration 32, los

In [5]:
# 예측
predict = mlp.predict(x_test)
mlp.score(x_test, y_test)

0.9851851851851852

In [6]:
# 혼돈행렬
conf = np.zeros((10,10))
for i in range(len(predict)):
    conf[predict[i], y_test[i]] += 1
conf

array([[48.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0., 56.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0., 45.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0., 74.,  0.,  0.,  0.,  0.,  2.,  0.],
       [ 0.,  0.,  0.,  0., 50.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0., 57.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0., 57.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0., 53.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0., 37.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  1., 55.]])

In [7]:
sum([conf[i,i] for i in range(len(conf))]) / len(predict)

0.9851851851851852

- 퍼셉트론 < 다층퍼셉트론 < svm

----

In [8]:
# 화소수가 많은 MNIST 데이터 셋으로 확장
# 8 x 8 -> 28 x 28

In [9]:
# MNIST 데이터셋을 다층 퍼셉트론으로 인식
from sklearn.datasets import fetch_openml

In [10]:
# 시각화
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# MNIST 데이터셋 읽기
mnist = fetch_openml('mnist_784')

In [12]:
type(mnist)

sklearn.utils._bunch.Bunch

In [13]:
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [14]:
X = mnist.data
Y = mnist.target

In [15]:
X.shape, Y.shape

((70000, 784), (70000,))

In [16]:
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.3, random_state=32)

In [17]:
mlp = MLPClassifier(hidden_layer_sizes=(100), learning_rate_init=0.001
             ,batch_size = 512, max_iter = 300, solver = 'adam', verbose = True)

In [18]:
mlp.fit(x_train, y_train)

Iteration 1, loss = 5.73996526
Iteration 2, loss = 1.88011007
Iteration 3, loss = 1.25126958
Iteration 4, loss = 0.90506720
Iteration 5, loss = 0.68427842
Iteration 6, loss = 0.52592613
Iteration 7, loss = 0.41579753
Iteration 8, loss = 0.33382644
Iteration 9, loss = 0.27173203
Iteration 10, loss = 0.22790534
Iteration 11, loss = 0.18241951
Iteration 12, loss = 0.14851019
Iteration 13, loss = 0.12602128
Iteration 14, loss = 0.10923751
Iteration 15, loss = 0.10453139
Iteration 16, loss = 0.08592386
Iteration 17, loss = 0.07385869
Iteration 18, loss = 0.06344919
Iteration 19, loss = 0.05457604
Iteration 20, loss = 0.04962246
Iteration 21, loss = 0.04714802
Iteration 22, loss = 0.04827988
Iteration 23, loss = 0.04724158
Iteration 24, loss = 0.05061021
Iteration 25, loss = 0.04264600
Iteration 26, loss = 0.04349129
Iteration 27, loss = 0.04458033
Iteration 28, loss = 0.04068405
Iteration 29, loss = 0.04291117
Iteration 30, loss = 0.04930763
Iteration 31, loss = 0.05364076
Iteration 32, los

In [19]:
predict = mlp.predict(x_test)
predict

array(['6', '9', '2', ..., '3', '5', '7'], dtype='<U1')

In [20]:
mlp.score(x_test, y_test)

0.9553809523809523

### 하이퍼 매개변수의 값을 결정하는 방법

In [21]:
# hidden_layer_sizes = (100)     100개의 노드를 가지는 히든 레이어 한개 층을 둔다
# hidden_layer_sizes = (100,80)
# learning_rate_init = 0.001     학습률 p = 0.001
# batch_size = 32               미니배치 크기를 32로 설정
# max_iter = 300               최대 epoch수를 300
# solv = ' sgd'최적화 알고리즘(옵티마이져) 스토캐스틱 경사하강법
# n_jobs = 코어 개수에 따른 코어 사용개수

- 학습을 중간에 멈춘 이유 n_iter_no_change = 10 10세대 동안 손실함수 감소량이 tol = 0.0001   0.0001이하면 멈춤

In [22]:
import time
from sklearn.model_selection import validation_curve

In [23]:
start = time.time()
start

1664706845.4968677

In [24]:
# 오래걸림
# start = time.time()
# mlp = MLPClassifier(learning_rate_init = 0.001, batch_size = 32, max_iter = 300, solver = 'sgd')
# prange = range(50,1001,50)
# train_score, test_score =  validation_curve(mlp, x_train, y_train, param_name = 'hidden_layer_sizes'
#                                     ,param_range = prange,cv = 10, scoring = 'accuracy', n_jobs = 4, verbose = True)
# end = time.time()
# print(f"하이퍼 매개변수 최적화에 걸린시간은 {end-start}")

In [25]:
# train_mean = np.mean(train_score, axis = 1)
# train_std = np.std(train_score, axis = 1)
# test_mean = np.mean(train_score, axis = 1)
# test_std = np.std(train_score, axis = 1)

In [26]:
# best_numbers_nodes = prange[np.argmax(test_mean)]

In [27]:
# best_numbers_nodes 으로 모델링해서 결과를 확인