In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
tf.enable_eager_execution()
tf.__version__

# ZOO classification

### Data list

1. 동물 이름  animal name:     (deleted)
2. 털  hair     Boolean
3. 깃털  feathers     Boolean
4. 알  eggs     Boolean
5. 우유 milk     Boolean
6. 날 수있는지  airborne     Boolean
7. 수중 생물  aquatic      Boolean
8. 포식자  predator     Boolean
9. 이빨이 있는지 toothed      Boolean
10. 척추 동물  backbone     Boolean
11. 호흡 방법  breathes     Boolean
12. 독  venomous     Boolean
13. 물갈퀴  fins     Boolean
14. 다리  legs     Numeric (set of values: {0",2,4,5,6,8})
15. 꼬리  tail     Boolean
16. 사육 가능한 지 domestic     Boolean
17. 고양이 크기인지 catsize      Boolean
18. 동물 타입 type     Numeric (integer values in range [0",6])

In [None]:
xy = np.loadtxt('./data-04-zoo.csv', delimiter=',', dtype=np.int32)
x_train = xy[0:-10, 0:-1]
y_train = xy[0:-10, [-1]]

x_train = tf.cast(x_train, tf.float32)

x_test = xy[-10:, 0:-1]
y_test = xy[-10:, [-1]]

x_test = tf.cast(x_test, tf.float32)

nb_classes = 7  # 0 ~ 6

y_train = tf.one_hot(list(y_train), nb_classes)
y_train = tf.reshape(y_train, [-1, nb_classes])


y_test = tf.one_hot(list(y_test), nb_classes)
y_test = tf.reshape(y_test, [-1, nb_classes])

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

print(x_train.dtype, y_train.dtype)
print(x_test.dtype, y_test.dtype)


In [3]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))

W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

print(W.shape, b.shape)

(16, 7) (7,)


# 가설 설정

* 주어진 동물의 데이터들로 분류하는 가설 모델을 생성한다

## $$ y_k = \frac{exp(x_k)}{\sum_{i=1}^{n}(x_i)}  $$

In [4]:
def logistic_regression(features):
    return tf.nn.softmax(tf.matmul(features, W) + b)
  
print(logistic_regression(x_train))

tf.Tensor(
[[2.59999069e-04 2.27401179e-04 2.78171992e-05 5.33371240e-05
  3.60713638e-02 4.75822002e-01 4.87538040e-01]
 [1.24953394e-05 2.45422943e-05 5.10665268e-06 1.01480655e-05
  1.21735977e-02 3.22862625e-01 6.64911449e-01]
 [4.62341189e-01 1.02376211e-02 5.77087572e-04 2.10983492e-03
  3.95924877e-03 5.10393858e-01 1.03811836e-02]
 [2.59999069e-04 2.27401179e-04 2.78171992e-05 5.33371240e-05
  3.60713638e-02 4.75822002e-01 4.87538040e-01]
 [2.34420077e-05 5.13266714e-05 6.04117940e-06 1.65942656e-05
  2.81189084e-02 3.36358875e-01 6.35424852e-01]
 [1.24953394e-05 2.45422943e-05 5.10665268e-06 1.01480655e-05
  1.21735977e-02 3.22862625e-01 6.64911449e-01]
 [3.16285768e-05 3.63749459e-05 5.49743891e-06 1.38476782e-04
  3.64101082e-02 2.41978437e-01 7.21399486e-01]
 [6.03710115e-01 7.02165067e-03 5.08231635e-04 1.70391593e-02
  4.96154325e-03 3.55353147e-01 1.14061572e-02]
 [4.62341189e-01 1.02376211e-02 5.77087572e-04 2.10983492e-03
  3.95924877e-03 5.10393858e-01 1.03811836e-02]

## Loss Function

## $$
\begin{align}
loss(h(x),y) & = −y log(h(x))−(1−y)log(1−h(x))
\end{align}
$$

In [5]:
def loss_fn(hypothesis, labels):
    cost = -tf.reduce_mean(labels * tf.log(hypothesis) + (1 - labels) * tf.log(1 - hypothesis))
    return cost

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

In [6]:
epochs = 5000

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
      loss_value = loss_fn(logistic_regression(features),labels)
      grads = tape.gradient(loss_value, [W,b])
      optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b]))
      if step % 100 == 0:
            print("Iter: {}, Loss: {:.4f}".format(step, loss_fn(logistic_regression(features),labels)))
          

Iter: 0, Loss: 1.1400
Iter: 100, Loss: 0.9999
Iter: 200, Loss: 0.8727
Iter: 300, Loss: 0.7528
Iter: 400, Loss: 0.6395
Iter: 500, Loss: 0.5384
Iter: 600, Loss: 0.4648
Iter: 700, Loss: 0.4232
Iter: 800, Loss: 0.3981
Iter: 900, Loss: 0.3792
Iter: 1000, Loss: 0.3629
Iter: 1100, Loss: 0.3482
Iter: 1200, Loss: 0.3346
Iter: 1300, Loss: 0.3219
Iter: 1400, Loss: 0.3099
Iter: 1500, Loss: 0.2987
Iter: 1600, Loss: 0.2881
Iter: 1700, Loss: 0.2780
Iter: 1800, Loss: 0.2686
Iter: 1900, Loss: 0.2597
Iter: 2000, Loss: 0.2512
Iter: 2100, Loss: 0.2432
Iter: 2200, Loss: 0.2356
Iter: 2300, Loss: 0.2285
Iter: 2400, Loss: 0.2217
Iter: 2500, Loss: 0.2152
Iter: 2600, Loss: 0.2091
Iter: 2700, Loss: 0.2033
Iter: 2800, Loss: 0.1978
Iter: 2900, Loss: 0.1926
Iter: 3000, Loss: 0.1877
Iter: 3100, Loss: 0.1831
Iter: 3200, Loss: 0.1787
Iter: 3300, Loss: 0.1746
Iter: 3400, Loss: 0.1707
Iter: 3500, Loss: 0.1670
Iter: 3600, Loss: 0.1636
Iter: 3700, Loss: 0.1603
Iter: 3800, Loss: 0.1572
Iter: 3900, Loss: 0.1543
Iter: 4000, 

In [7]:
def accuracy_fn(hypothesis, labels):
    predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)    
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))
    return accuracy

In [8]:
test_acc = accuracy_fn(logistic_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

Testset Accuracy: 0.9365
