# One-hot Encoder
- 和 1_simple_binary.ipynb 是同樣的功能
- 在輸出資料比較 '原本的一個輸出' 和 '改用 one-hot 編碼' 的結果

## 1. 起始資料集

In [7]:
import tensorflow as tf
import numpy as np


# 資料集
x1 = np.random.random((500,1))
x2 = np.random.random((500,1))+1
x_train = np.concatenate((x1, x2))

y1 = np.zeros((500,), dtype=int)
y2 = np.ones((500,), dtype=int)
y_train = np.concatenate((y1, y2))

# 將數字轉為 One-hot 向量
y_train_onehot = tf.keras.utils.to_categorical(y_train,  num_classes=2)

print(y_train)
print(y_train_onehot)


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

## 建立 model


In [8]:
# 建立模型 (方法一)
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(10, activation=tf.nn.relu, input_dim=1),
  tf.keras.layers.Dense(10, activation=tf.nn.relu),
  tf.keras.layers.Dense(2, activation=tf.nn.softmax)
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 10)                20        
                                                                 
 dense_4 (Dense)             (None, 10)                110       
                                                                 
 dense_5 (Dense)             (None, 2)                 22        
                                                                 
Total params: 152 (608.00 Byte)
Trainable params: 152 (608.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## 編譯、訓練、測試、預測

### 注意：使用 sparse_categorical_crossentropy

In [9]:
# 編譯
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# model.compile(optimizer='adam', loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])

# 訓練
model.fit(x_train, y_train, epochs=20, batch_size=128, verbose=0)

# 測試
x_test = np.array([[0.22], [0.31], [1.22], [1.33]])
y_test = np.array([0, 0, 1, 1])

score = model.evaluate(x_test, y_test, batch_size=128)
print('score:', score)

# 預測
predict = model.predict(x_test)
print('predict:', predict)
print('predict class:', np.argmax(predict[0]), np.argmax(predict[1]), np.argmax(predict[2]), np.argmax(predict[3]))


score: [0.4138142466545105, 1.0]
predict: [[0.5835376  0.4164624 ]
 [0.5835376  0.4164624 ]
 [0.2675742  0.73242587]
 [0.23399842 0.7660016 ]]
predict class: 0 0 1 1


## One-hot 編碼方式
### 注意：使用 categorical_crossentropy

In [10]:
# 編譯
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# model.compile(optimizer='adam', loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy'])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train_onehot, epochs=20, batch_size=128, verbose=0)

# 測試
x_test = np.array([[0.22], [0.31], [1.22], [1.33]])
y_test = np.array([0, 0, 1, 1])
y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes=2)

score = model.evaluate(x_test, y_test_onehot, batch_size=128)
print('score:', score)

predict = model.predict(x_test)
print('predict:', predict)
print('predict class:', np.argmax(predict[0]), np.argmax(predict[1]), np.argmax(predict[2]), np.argmax(predict[3]))









score: [0.3088674545288086, 1.0]
predict: [[0.65673876 0.34326127]
 [0.65673876 0.34326127]
 [0.20619489 0.79380506]
 [0.1509314  0.8490686 ]]
predict class: 0 0 1 1


### 注意：此處二元分類直接設定 binary_crossentropy

In [11]:
# 編譯
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train_onehot, epochs=20, batch_size=128, verbose=0)

# 測試
x_test = np.array([[0.22], [0.31], [1.22], [1.33]])
y_test = np.array([0, 0, 1, 1])
y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes=2)

score = model.evaluate(x_test, y_test_onehot, batch_size=128)
print('score:', score)

predict = model.predict(x_test)
print('predict:', predict)
print('predict class:', np.argmax(predict[0]), np.argmax(predict[1]), np.argmax(predict[2]), np.argmax(predict[3]))








score: [0.22474655508995056, 1.0]
predict: [[0.7219402  0.27805978]
 [0.7219402  0.27805978]
 [0.14952156 0.8504784 ]
 [0.08185929 0.9181407 ]]
predict class: 0 0 1 1


In [12]:
del model