In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.__version__

'2.15.0'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 당뇨병을 진단해봅시다.

In [3]:
xy = np.loadtxt('/content/drive/MyDrive/dataset/example_data/data-03-diabetes.csv',
                delimiter=',',
                dtype=np.float32)
x_train = xy[0:-100, 0:-1]
y_train = xy[0:-100, [-1]]

x_test = xy[-100:, 0:-1]
y_test = xy[-100:, [-1]]

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(659, 8) (659, 1)
(100, 8) (100, 1)


In [4]:
print(x_test[0])
print(y_test[0])

[-0.294118  -0.0150754 -0.0491803 -0.333333  -0.550827   0.0134128
 -0.699402  -0.266667 ]
[1.]


In [5]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))
for t, l in dataset:
    print(t)
    print(l)
    break

tf.Tensor(
[[-0.294118    0.487437    0.180328   ...  0.00149028 -0.53117
  -0.0333333 ]
 [-0.882353   -0.145729    0.0819672  ... -0.207153   -0.766866
  -0.666667  ]
 [-0.0588235   0.839196    0.0491803  ... -0.305514   -0.492741
  -0.633333  ]
 ...
 [-0.882353    0.125628    0.311475   ...  0.0372578  -0.881298
  -0.9       ]
 [-0.529412    0.457286    0.344262   ... -0.0312965  -0.865927
   0.633333  ]
 [ 0.176471    0.115578    0.147541   ... -0.180328   -0.9462
  -0.366667  ]], shape=(659, 8), dtype=float32)
tf.Tensor(
[[0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 

In [6]:
W = tf.Variable(tf.random.normal([8, 1]), name='weight')
b = tf.Variable(tf.random.normal([1]), name='bias')


## 가설 설정
* 병이 있다 / 없다로 분류
* sigmoid classification으로 진행


## $$ y_k = \frac{1}{1+e^{-x}}   $$

In [7]:
def logistic_regression(features):
    hypothesis  = tf.sigmoid(tf.matmul(features, W) + b)
    return hypothesis

print(logistic_regression(x_train))

tf.Tensor(
[[0.33308595]
 [0.20189965]
 [0.21860813]
 [0.07307252]
 [0.4862488 ]
 [0.18095288]
 [0.10315586]
 [0.37928453]
 [0.29211998]
 [0.25915906]
 [0.2569909 ]
 [0.39881024]
 [0.3260142 ]
 [0.6405014 ]
 [0.30151358]
 [0.30316007]
 [0.24636728]
 [0.20362325]
 [0.09939175]
 [0.17996605]
 [0.32880646]
 [0.327657  ]
 [0.2581547 ]
 [0.13120812]
 [0.36354727]
 [0.06811441]
 [0.08722534]
 [0.24066402]
 [0.3689007 ]
 [0.14725298]
 [0.06416173]
 [0.11882685]
 [0.28873718]
 [0.11703735]
 [0.339647  ]
 [0.36271223]
 [0.33687967]
 [0.23443918]
 [0.08966167]
 [0.38548714]
 [0.1764913 ]
 [0.28364506]
 [0.2403347 ]
 [0.5773987 ]
 [0.23367727]
 [0.23074381]
 [0.42290488]
 [0.3276509 ]
 [0.03785765]
 [0.06062973]
 [0.05666859]
 [0.20755865]
 [0.2996706 ]
 [0.22454424]
 [0.2623888 ]
 [0.23136544]
 [0.5088486 ]
 [0.21506098]
 [0.2913393 ]
 [0.3022272 ]
 [0.24065626]
 [0.07914139]
 [0.31137338]
 [0.24013186]
 [0.33119303]
 [0.3623836 ]
 [0.03688881]
 [0.06264336]
 [0.11421739]
 [0.09909776]
 [0.49709

## Loss Function

* 기존 MSE 대신 Cross Entropy 사용

## $$
\begin{align}
loss(h(x),y) & = −y log(h(x))−(1−y)log(1−h(x))
\end{align}
$$

In [8]:
def loss_fn(hypothesis, labels):
    cost = -tf.reduce_mean(labels * tf.math.log(hypothesis) + (1-labels) * tf.math.log(1 - hypothesis))
    return cost

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.005)

## 학습

In [9]:
epochs = 5000

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
      pred = logistic_regression(features)
      loss_value = loss_fn(pred, labels)
      grads = tape.gradient(loss_value, [W, b])
      optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
      if step % 100 == 0:
            print("Iter: {}, Loss: {:.4f}".format(step, loss_fn(logistic_regression(features),labels)))

Iter: 0, Loss: 1.3138
Iter: 100, Loss: 1.1035
Iter: 200, Loss: 0.9582
Iter: 300, Loss: 0.8630
Iter: 400, Loss: 0.8022
Iter: 500, Loss: 0.7632
Iter: 600, Loss: 0.7376
Iter: 700, Loss: 0.7201
Iter: 800, Loss: 0.7074
Iter: 900, Loss: 0.6977
Iter: 1000, Loss: 0.6899
Iter: 1100, Loss: 0.6832
Iter: 1200, Loss: 0.6773
Iter: 1300, Loss: 0.6719
Iter: 1400, Loss: 0.6670
Iter: 1500, Loss: 0.6622
Iter: 1600, Loss: 0.6578
Iter: 1700, Loss: 0.6535
Iter: 1800, Loss: 0.6493
Iter: 1900, Loss: 0.6453
Iter: 2000, Loss: 0.6415
Iter: 2100, Loss: 0.6378
Iter: 2200, Loss: 0.6342
Iter: 2300, Loss: 0.6307
Iter: 2400, Loss: 0.6273
Iter: 2500, Loss: 0.6240
Iter: 2600, Loss: 0.6208
Iter: 2700, Loss: 0.6177
Iter: 2800, Loss: 0.6147
Iter: 2900, Loss: 0.6118
Iter: 3000, Loss: 0.6089
Iter: 3100, Loss: 0.6062
Iter: 3200, Loss: 0.6035
Iter: 3300, Loss: 0.6009
Iter: 3400, Loss: 0.5984
Iter: 3500, Loss: 0.5960
Iter: 3600, Loss: 0.5936
Iter: 3700, Loss: 0.5913
Iter: 3800, Loss: 0.5891
Iter: 3900, Loss: 0.5869
Iter: 4000, 

## 테스트

In [10]:
def accuracy_fn(hypothesis, labels):
    predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))
    return accuracy

In [11]:
test_acc = accuracy_fn(logistic_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

Testset Accuracy: 0.6600
