In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.__version__

'2.15.0'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# ZOO classification

### Data list

1. 동물 이름  animal name:     (deleted)
2. 털  hair     Boolean
3. 깃털  feathers     Boolean
4. 알  eggs     Boolean
5. 우유 milk     Boolean
6. 날 수있는지  airborne     Boolean
7. 수중 생물  aquatic      Boolean
8. 포식자  predator     Boolean
9. 이빨이 있는지 toothed      Boolean
10. 척추 동물  backbone     Boolean
11. 호흡 방법  breathes     Boolean
12. 독  venomous     Boolean
13. 물갈퀴  fins     Boolean
14. 다리  legs     Numeric (set of values: {0",2,4,5,6,8})
15. 꼬리  tail     Boolean
16. 사육 가능한 지 domestic     Boolean
17. 고양이 크기인지 catsize      Boolean
18. 동물 타입 type     Numeric (integer values in range [0",6])

In [3]:
xy = np.loadtxt('/content/drive/MyDrive/dataset/example_data/data-04-zoo.csv',
                delimiter=',',
                dtype=np.int32)
x_train = xy[0:-10, 0:-1]
y_train = xy[0:-10, [-1]]

x_train = tf.cast(x_train, tf.float32)

x_test = xy[-10:, 0:-1]
y_test = xy[-10:, [-1]]

print(y_test.shape)

x_test = tf.cast(x_test, tf.float32)

nb_classes = 7  # 0 ~ 6

# [0, 1, 2] 총 class가 3개일때,
# label : 0, 0, 1, 2, 0, 1, 2 ....
# 1 => [0, 1, 0]
# 0 => [1, 0, 0]
# 2 => [0, 0, 1]

print(y_train[15])
y_train = tf.one_hot(list(y_train), nb_classes)
y_train = tf.reshape(y_train, [-1, nb_classes])
print(y_train[15])

y_test = tf.one_hot(list(y_test), nb_classes)
y_test = tf.reshape(y_test, [-1, nb_classes])

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

print(x_train.dtype, y_train.dtype)
print(x_test.dtype, y_test.dtype)


(10, 1)
[6]
tf.Tensor([0. 0. 0. 0. 0. 0. 1.], shape=(7,), dtype=float32)
(91, 16) (91, 7)
(10, 16) (10, 7)
<dtype: 'float32'> <dtype: 'float32'>
<dtype: 'float32'> <dtype: 'float32'>


In [4]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))

W = tf.Variable(tf.random.normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random.normal([nb_classes]), name='bias')

print(W.shape, b.shape)

(16, 7) (7,)


# 가설 설정

* 주어진 동물의 데이터들로 분류하는 가설 모델을 생성한다

## $$ y_k = \frac{exp(H(x_k))}{\sum_{i=1}^{n}exp(H(x_i))}  $$

In [5]:
def logistic_regression(features): # hypothesis_softmax
    return tf.nn.softmax(tf.matmul(features, W) + b)

print(logistic_regression(x_train))

tf.Tensor(
[[9.97862458e-01 3.65469077e-07 4.96618391e-04 7.14177673e-04
  9.11449199e-04 2.57889127e-07 1.46230377e-05]
 [9.81055617e-01 1.33461581e-04 3.40082590e-03 5.14679682e-03
  1.02063948e-02 7.01570525e-06 4.99651651e-05]
 [3.87531608e-01 3.37165222e-02 6.77116513e-02 4.53361899e-01
  5.41772582e-02 2.06448767e-05 3.48041789e-03]
 [9.97862458e-01 3.65469077e-07 4.96618391e-04 7.14177673e-04
  9.11449199e-04 2.57889127e-07 1.46230377e-05]
 [9.92906570e-01 5.59150203e-06 8.37069645e-04 4.88271937e-04
  5.70859993e-03 6.31319381e-07 5.33554812e-05]
 [9.81055617e-01 1.33461581e-04 3.40082590e-03 5.14679682e-03
  1.02063948e-02 7.01570525e-06 4.99651651e-05]
 [9.65767562e-01 2.04976968e-04 1.21739146e-03 1.53823812e-02
  1.73420217e-02 1.95928769e-05 6.60387377e-05]
 [2.33203713e-02 7.64685795e-02 6.09250460e-03 8.83630037e-01
  1.01824170e-02 3.96392352e-05 2.66510906e-04]
 [3.87531608e-01 3.37165222e-02 6.77116513e-02 4.53361899e-01
  5.41772582e-02 2.06448767e-05 3.48041789e-03]

## Loss Function

##$$
\begin{align}
cost(H(x),y) & = −\sum_{n=1}^{n} Y log(H(x))
\end{align}
$$

In [6]:
def loss_fn(labels, hypothesis):
    # loss = -tf.reduce_mean(labels * tf.math.log(hypothesis) + (1 - labels) * tf.math.log(1 - hypothesis))
    loss = tf.keras.losses.categorical_crossentropy(labels, hypothesis)
    return loss

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.001)

In [7]:
epochs = 5000

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
      pred = logistic_regression(features)
      loss_value = loss_fn(labels, pred)
      grads = tape.gradient(loss_value, [W, b])
      optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
      if step % 100 == 0:
            print("Iter: {}, Loss: {:.4f}".format(step, tf.reduce_mean(loss_fn(logistic_regression(features),labels))))


Iter: 0, Loss: 9.0271
Iter: 100, Loss: 5.0552
Iter: 200, Loss: 3.5679
Iter: 300, Loss: 2.9066
Iter: 400, Loss: 2.4791
Iter: 500, Loss: 2.1719
Iter: 600, Loss: 1.9414
Iter: 700, Loss: 1.7617
Iter: 800, Loss: 1.6164
Iter: 900, Loss: 1.4953
Iter: 1000, Loss: 1.3917
Iter: 1100, Loss: 1.3014
Iter: 1200, Loss: 1.2217
Iter: 1300, Loss: 1.1506
Iter: 1400, Loss: 1.0868
Iter: 1500, Loss: 1.0291
Iter: 1600, Loss: 0.9768
Iter: 1700, Loss: 0.9292
Iter: 1800, Loss: 0.8857
Iter: 1900, Loss: 0.8459
Iter: 2000, Loss: 0.8092
Iter: 2100, Loss: 0.7755
Iter: 2200, Loss: 0.7443
Iter: 2300, Loss: 0.7154
Iter: 2400, Loss: 0.6885
Iter: 2500, Loss: 0.6636
Iter: 2600, Loss: 0.6403
Iter: 2700, Loss: 0.6185
Iter: 2800, Loss: 0.5982
Iter: 2900, Loss: 0.5791
Iter: 3000, Loss: 0.5612
Iter: 3100, Loss: 0.5443
Iter: 3200, Loss: 0.5284
Iter: 3300, Loss: 0.5134
Iter: 3400, Loss: 0.4992
Iter: 3500, Loss: 0.4857
Iter: 3600, Loss: 0.4730
Iter: 3700, Loss: 0.4609
Iter: 3800, Loss: 0.4494
Iter: 3900, Loss: 0.4385
Iter: 4000, 

In [8]:
def accuracy_fn(hypothesis, labels):
    print(hypothesis)
    hypothesis = tf.argmax(hypothesis, 1)
    print(hypothesis)
    predicted = tf.cast(hypothesis, dtype=tf.float32)
    print(predicted)

    labels = tf.argmax(labels, 1)
    labels = tf.cast(labels, dtype=tf.float32)
    print(labels)

    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))

    return accuracy

In [9]:
test_acc = accuracy_fn(logistic_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

tf.Tensor(
[[1.4980710e-02 4.7399080e-04 2.9511353e-01 1.0696776e-05 6.7130738e-01
  1.7277369e-03 1.6385794e-02]
 [1.4926172e-04 1.7036715e-03 3.2305950e-03 9.9445492e-01 3.1943966e-04
  3.1903300e-09 1.4194271e-04]
 [9.8849458e-01 8.3978572e-05 1.0792634e-02 8.6890662e-07 3.6666761e-04
  2.6062367e-04 5.0833711e-07]
 [9.9731064e-01 3.2374072e-07 1.0958459e-03 3.0753895e-09 1.5019949e-03
  9.0157344e-05 1.0496885e-06]
 [1.5826343e-04 9.9496746e-01 4.6042968e-03 1.6650058e-05 6.6462449e-06
  9.0878711e-05 1.5597645e-04]
 [9.9510717e-01 5.1606007e-06 4.7814455e-03 7.8439621e-07 1.0347663e-04
  1.4630670e-06 4.8094125e-07]
 [1.4826478e-04 1.1907207e-06 1.5148593e-04 6.9320910e-11 3.1387087e-04
  9.9767226e-01 1.7129480e-03]
 [9.9873692e-01 2.0567980e-07 9.6327550e-04 1.5219209e-09 2.9110129e-04
  2.1411449e-06 6.2852814e-06]
 [1.3202489e-02 4.1597545e-02 1.0007971e-01 5.7198316e-02 1.5653022e-02
  1.2964607e-02 7.5930429e-01]
 [1.0031571e-04 9.9408603e-01 3.3248444e-03 2.1356653e-05 2.17