# 다층 퍼셉트론

In [1]:
import sklearn.neural_network as nn

ppn_clf = nn.MLPClassifier(learning_rate_init = 0.1,
                          max_iter = 1000,
                          activation = 'relu',
                          hidden_layer_sizes = (50,40,30))

#은닉층 3개 , 첫번째 은닉층 노드 50개, 두번째 층 노드40개, 세번째 층 노드 30개

### 데이터

In [2]:
import sklearn.datasets as d

# dir(d) #밑줄 그어있는 거 제외하고 불러올 수 있는 데이터셋(패치-실제 데이터)

iris = d.load_iris()
X = iris.data
y = iris.target
#토이 데이터셋이라 가능 

In [3]:
import sklearn.model_selection as ms 

X_train, X_test, y_train, y_test = ms.train_test_split(X,y,test_size=0.3, random_state = 0)
#random_state : 실행할 때 마다 결과가 달라지는 것 방지 


In [4]:
#학습수행 

In [5]:
clf = ppn_clf.fit(X_train, y_train)

In [6]:
y_pred = clf.predict(X_test)

In [7]:
#정확도 측정 
import sklearn.metrics as mt
score = mt.accuracy_score(y_test, y_pred)
print("정확도:", score)

정확도: 0.9777777777777777


In [8]:
mt.confusion_matrix(y_test, y_pred)

array([[16,  0,  0],
       [ 0, 17,  1],
       [ 0,  0, 11]], dtype=int64)

In [9]:
#한번에 보기
print(mt.classification_report(y_test, y_pred))

#class번호가 0 1 2
#평가지표들 보여줌(1에 가까울 수록 좋음)
#support 찾은 것이 몇개?

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [10]:
#은닉층 설정 바꿔보기
ppn_clf = nn.MLPClassifier(learning_rate_init = 0.1,
                          max_iter = 1000,
                          activation = 'relu',
                          hidden_layer_sizes = (100,))

clf = ppn_clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

score = mt.accuracy_score(y_test, y_pred)
print("정확도:", score)

mt.confusion_matrix(y_test, y_pred)

정확도: 0.9777777777777777


array([[16,  0,  0],
       [ 0, 17,  1],
       [ 0,  0, 11]], dtype=int64)

In [11]:
#매개변수의 최적화 값을 찾는 것
#GridSearchCV 모듈 

In [14]:
ppn_clf = nn.MLPClassifier(max_iter = 1000)
params = {
    'hidden_layer_sizes': [(100,),(100,100),(50,),(50,50)],
    'activation' : ['relu','tanh']
}

gs_result = ms.GridSearchCV(ppn_clf,params,n_jobs = -1)
gs_result.fit(X_train, y_train)

GridSearchCV(estimator=MLPClassifier(max_iter=1000), n_jobs=-1,
             param_grid={'activation': ['relu', 'tanh'],
                         'hidden_layer_sizes': [(100,), (100, 100), (50,),
                                                (50, 50)]})

In [None]:
help(ms.GridSearchCV)

In [15]:
gs_result.best_estimator_

MLPClassifier(max_iter=1000)

In [17]:
dir(gs_result)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_is_fitted',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_estimator_type',
 '_format_results',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_pairwise',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run_search',
 '_validate_data',
 'best_estimator_',
 'best_index_',
 'best_params_',
 'best_score_',
 'classes_',
 'cv',
 'cv_results_',
 'decision_function',
 'error_score',
 'estimator',
 'fit',
 'get_params',
 'inverse_transform',
 'multimetric_',
 'n_features_in_',
 'n_jobs',
 'n_splits

In [18]:
gs_result.best_params_ #매개변수 최적값

{'activation': 'relu', 'hidden_layer_sizes': (100,)}

In [16]:
gs_result.best_score_

0.9714285714285715

In [None]:
#실습2 - MNIST

In [24]:
mnist = d.fetch_openml('mnist_784',version =1 )

In [26]:
mnist.DESCR

"**Author**: Yann LeCun, Corinna Cortes, Christopher J.C. Burges  \n**Source**: [MNIST Website](http://yann.lecun.com/exdb/mnist/) - Date unknown  \n**Please cite**:  \n\nThe MNIST database of handwritten digits with 784 features, raw data available at: http://yann.lecun.com/exdb/mnist/. It can be split in a training set of the first 60,000 examples, and a test set of 10,000 examples  \n\nIt is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image. It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting. The original black and white (bilevel) images from NIST were size normalized to fit in a 20x20 pixel box while preserving their aspect ratio. The resulting images contain grey levels as a result of the anti-aliasing technique used by the normalization algorithm. the images were centered in a 28x28 

In [27]:
import numpy as np

X = mnist.data
y = mnist.target.astype(np.int)
#정수로 바꿈

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = mnist.target.astype(np.int)


In [None]:
#X.shape
#y.shape
#배열의 모양(70000열, 784행)

In [28]:
np.unique(y)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [29]:
X_train, X_test, y_train, y_test = ms.train_test_split(X,y, test_size=0.3)

ppn_clf = nn.MLPClassifier(learning_rate_init=0.001, max_iter=1000,
                          activation = "relu",
                          hidden_layer_sizes=(10,10))
clf = ppn_clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("정확도=",mt.accuracy_score(y_test,y_pred))


정확도= 0.7615238095238095


In [None]:
#정규화
X = X/255.0
#X를 이렇게 만들고 위로 돌아가 학습 다시 수행 -->정확도 올라감 