<a href="https://colab.research.google.com/github/JakeOh/202205_itw_bd34/blob/main/ml15_ann.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

ANN(Artificial Neural Network, 인공 신경망)

# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt

# scikit-learn
from sklearn import datasets
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_validate, train_test_split

# tensorflow, keras
import tensorflow as tf
from tensorflow import keras

In [2]:
tf.__version__  # Google Colab에 설치된 Tensorflow 버전

'2.8.2'

In [3]:
keras.__version__  # Google Colab의 Keras 버전

'2.8.0'

# Iris 데이터 셋

In [4]:
data, target = datasets.load_iris(return_X_y=True)

In [5]:
data[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [6]:
target[:5]

array([0, 0, 0, 0, 0])

## petal_length, petal_width 변수만 선택한 이진 분류

In [7]:
X = data[:, 2:].copy()
y = (target != 0).astype('int')

In [8]:
X[:5]

array([[1.4, 0.2],
       [1.4, 0.2],
       [1.3, 0.2],
       [1.5, 0.2],
       [1.4, 0.2]])

In [9]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

## SGD Classifier

In [10]:
sgd = SGDClassifier(loss='log', max_iter=10,
                    n_jobs=-1, random_state=42)

In [11]:
sgd.fit(X, y)

SGDClassifier(loss='log', max_iter=10, n_jobs=-1, random_state=42)

In [12]:
sgd.coef_

array([[9.29310825, 9.5046378 ]])

In [13]:
sgd.intercept_

array([-33.65502003])

In [14]:
pred_prob = sgd.predict_proba(X)
pred_prob[:5]

array([[9.99999993e-01, 7.23930287e-09],
       [9.99999993e-01, 7.23930287e-09],
       [9.99999997e-01, 2.85826298e-09],
       [9.99999982e-01, 1.83354387e-08],
       [9.99999993e-01, 7.23930287e-09]])

In [15]:
pred = sgd.predict(X)
pred[:5]

array([0, 0, 0, 0, 0])

In [16]:
np.mean(y == pred)

1.0

## Neural Network

In [17]:
# 신경망 모델 생성
model = keras.Sequential(layers=[keras.layers.Dense(units=1,
                                                    activation='sigmoid',
                                                    input_shape=(2,))],
                         name='iris_binary_single_layer')

In [18]:
# 신경망 모델 요약 정보: layers, parameters
model.summary()

Model: "iris_binary_single_layer"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 3         
                                                                 
Total params: 3
Trainable params: 3
Non-trainable params: 0
_________________________________________________________________


In [19]:
# 신경망 모델 컴파일: optimizer(훈련 방식), loss(손실 함수), metrics(평가 점수 함수) 설정
model.compile(optimizer='sgd', 
              loss=keras.losses.binary_crossentropy,
              metrics=keras.metrics.binary_accuracy)

In [20]:
# 신경망 훈련
model.fit(X, y, epochs=10, batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f06a01d3110>

In [21]:
model.weights

[<tf.Variable 'dense/kernel:0' shape=(2, 1) dtype=float32, numpy=
 array([[0.48115423],
        [1.4339734 ]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(1,) dtype=float32, numpy=array([-1.7035384], dtype=float32)>]

In [22]:
pred = model.predict(X)  # 이진 분류에서 predict()는 양성(1)이 될 확률
pred.shape

(150, 1)

In [23]:
pred[:5]

array([[0.32232055],
       [0.32232055],
       [0.31190178],
       [0.33291897],
       [0.32232055]], dtype=float32)

In [24]:
# 예측값(0 또는 1)을 계산하기 위해서는 확률 > 0.5 비교해야 함.
pred_label = (pred > 0.5).astype('int').reshape((-1,))
pred_label

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [25]:
np.mean(y == pred_label)  # 정확도: 실젯값과 예측값 비교

1.0

## 모든 변수(sl, sw, pl, pw)를 사용한 이진 분류

In [26]:
X = data.copy()

### SGD

In [27]:
sgd = SGDClassifier(loss='log', max_iter=10,
                    n_jobs=-1, random_state=42)

In [28]:
sgd.fit(X, y)

SGDClassifier(loss='log', max_iter=10, n_jobs=-1, random_state=42)

In [35]:
sgd.coef_, sgd.intercept_

(array([[ -7.56466569, -26.11029771,  43.43582235,  20.74182528]]),
 array([-14.82209562]))

In [29]:
pred = sgd.predict(X)  # 예측값(0 또는 1)

In [30]:
np.mean(y == pred)

1.0

### 신경망

In [31]:
model = keras.Sequential(layers=[keras.layers.Dense(units=1,
                                                    activation='sigmoid',
                                                    input_shape=(4,))])

In [32]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 1)                 5         
                                                                 
Total params: 5
Trainable params: 5
Non-trainable params: 0
_________________________________________________________________


In [33]:
model.compile(optimizer=keras.optimizers.SGD(),
              loss=keras.losses.binary_crossentropy,
              metrics=keras.metrics.binary_accuracy)

In [34]:
model.fit(X, y, batch_size=1, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f06a027d2d0>

In [36]:
# 신경망이 학습 데이터를 훈련하면 가중치들을 찾음.
model.weights

[<tf.Variable 'dense_1/kernel:0' shape=(4, 1) dtype=float32, numpy=
 array([[-0.18837994],
        [-1.1131188 ],
        [ 1.5116737 ],
        [ 0.99822456]], dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(1,) dtype=float32, numpy=array([-0.37639856], dtype=float32)>]

In [37]:
pred = model.predict(X)
pred[:5]

array([[0.05131645],
       [0.08924986],
       [0.06543989],
       [0.0973995 ],
       [0.04699684]], dtype=float32)

In [40]:
pred_label = (pred > 0.5).astype('int').reshape((-1,))
pred_label[:5]

array([0, 0, 0, 0, 0])

In [42]:
np.mean(y == pred_label)  # 훈련 셋 정확도

1.0