In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.__version__

'2.0.0'

# ZOO classification

### Data list

1. 동물 이름  animal name:     (deleted)
2. 털  hair     Boolean
3. 깃털  feathers     Boolean
4. 알  eggs     Boolean
5. 우유 milk     Boolean
6. 날 수있는지  airborne     Boolean
7. 수중 생물  aquatic      Boolean
8. 포식자  predator     Boolean
9. 이빨이 있는지 toothed      Boolean
10. 척추 동물  backbone     Boolean
11. 호흡 방법  breathes     Boolean
12. 독  venomous     Boolean
13. 물갈퀴  fins     Boolean
14. 다리  legs     Numeric (set of values: {0",2,4,5,6,8})
15. 꼬리  tail     Boolean
16. 사육 가능한 지 domestic     Boolean
17. 고양이 크기인지 catsize      Boolean
18. 동물 타입 type     Numeric (integer values in range [0",6])

In [3]:
xy = np.loadtxt('../Day_1/data-04-zoo.csv', delimiter=',', dtype=np.int32)
x_train = xy[0:-10, 0:-1]
y_train = xy[0:-10, [-1]]

x_train = tf.cast(x_train, tf.float32)

x_test = xy[-10:, 0:-1]
y_test = xy[-10:, [-1]]

x_test = tf.cast(x_test, tf.float32)

nb_classes = 7  # 0 ~ 6

y_train = tf.one_hot(list(y_train), nb_classes)
y_train = tf.reshape(y_train, [-1, nb_classes])


y_test = tf.one_hot(list(y_test), nb_classes)
y_test = tf.reshape(y_test, [-1, nb_classes])

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

print(x_train.dtype, y_train.dtype)
print(x_test.dtype, y_test.dtype)


(91, 16) (91, 7)
(10, 16) (10, 7)
<dtype: 'float32'> <dtype: 'float32'>
<dtype: 'float32'> <dtype: 'float32'>


In [5]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))

W = tf.Variable(tf.random.normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random.normal([nb_classes]), name='bias')

print(W.shape, b.shape)

(16, 7) (7,)


# 가설 설정

* 주어진 동물의 데이터들로 분류하는 가설 모델을 생성한다

## $$ y_k = \frac{exp(x_k)}{\sum_{i=1}^{n}(x_i)}  $$

In [6]:
def logistic_regression(features):
    return tf.nn.softmax(tf.matmul(features, W) + b)
  
print(logistic_regression(x_train))

tf.Tensor(
[[7.91488419e-05 9.99198377e-01 8.68481814e-11 1.66612212e-04
  5.55863488e-04 7.59046506e-11 7.15354673e-08]
 [3.01785721e-03 9.94526803e-01 3.25311023e-09 9.93358844e-05
  2.35555833e-03 5.71062797e-09 3.71824541e-07]
 [5.58176786e-02 8.89892280e-01 2.77300887e-02 7.36197550e-03
  1.40492106e-02 4.82901512e-03 3.19739454e-04]
 [7.91488419e-05 9.99198377e-01 8.68481814e-11 1.66612212e-04
  5.55863488e-04 7.59046506e-11 7.15354673e-08]
 [4.61328658e-04 9.98578668e-01 2.94308244e-10 3.89773013e-05
  9.21102182e-04 6.43180287e-10 7.00154601e-08]
 [3.01785721e-03 9.94526803e-01 3.25311023e-09 9.93358844e-05
  2.35555833e-03 5.71062797e-09 3.71824541e-07]
 [2.23961496e-03 9.90158737e-01 2.11060414e-08 1.45423727e-03
  6.14508661e-03 5.31497912e-09 2.36799315e-06]
 [7.60934800e-02 2.47783765e-01 5.58429122e-01 7.71313310e-02
  2.63199285e-02 1.12057198e-02 3.03665595e-03]
 [5.58176786e-02 8.89892280e-01 2.77300887e-02 7.36197550e-03
  1.40492106e-02 4.82901512e-03 3.19739454e-04]

## Loss Function

## $$
\begin{align}
loss(h(x),y) & = −y log(h(x))−(1−y)log(1−h(x))
\end{align}
$$

In [10]:
def loss_fn(hypothesis, labels):
    cost = -tf.reduce_mean(labels * tf.math.log(hypothesis) + (1 - labels) * tf.math.log(1 - hypothesis))
    return cost

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)

In [11]:
epochs = 5000

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
      loss_value = loss_fn(logistic_regression(features),labels)
      grads = tape.gradient(loss_value, [W,b])
      optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b]))
      if step % 100 == 0:
            print("Iter: {}, Loss: {:.4f}".format(step, loss_fn(logistic_regression(features),labels)))
          

Iter: 0, Loss: 1.7658
Iter: 100, Loss: 1.0729
Iter: 200, Loss: 0.8281
Iter: 300, Loss: 0.7458
Iter: 400, Loss: 0.7019
Iter: 500, Loss: 0.6703
Iter: 600, Loss: 0.6434
Iter: 700, Loss: 0.6188
Iter: 800, Loss: 0.5954
Iter: 900, Loss: 0.5730
Iter: 1000, Loss: 0.5514
Iter: 1100, Loss: 0.5305
Iter: 1200, Loss: 0.5104
Iter: 1300, Loss: 0.4911
Iter: 1400, Loss: 0.4728
Iter: 1500, Loss: 0.4558
Iter: 1600, Loss: 0.4402
Iter: 1700, Loss: 0.4259
Iter: 1800, Loss: 0.4126
Iter: 1900, Loss: 0.4002
Iter: 2000, Loss: 0.3884
Iter: 2100, Loss: 0.3772
Iter: 2200, Loss: 0.3664
Iter: 2300, Loss: 0.3561
Iter: 2400, Loss: 0.3462
Iter: 2500, Loss: 0.3367
Iter: 2600, Loss: 0.3276
Iter: 2700, Loss: 0.3189
Iter: 2800, Loss: 0.3106
Iter: 2900, Loss: 0.3027
Iter: 3000, Loss: 0.2951
Iter: 3100, Loss: 0.2879
Iter: 3200, Loss: 0.2810
Iter: 3300, Loss: 0.2743
Iter: 3400, Loss: 0.2679
Iter: 3500, Loss: 0.2618
Iter: 3600, Loss: 0.2559
Iter: 3700, Loss: 0.2501
Iter: 3800, Loss: 0.2446
Iter: 3900, Loss: 0.2391
Iter: 4000, 

In [12]:
def accuracy_fn(hypothesis, labels):
    predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)    
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))
    return accuracy

In [13]:
test_acc = accuracy_fn(logistic_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

Testset Accuracy: 0.9429
