## Logistic Regression : 2진 분류(Binary Classification)
-  2진 분류의 활성화 함수로는 sigmoid가 사용됨

#### sigmoid 함수
- sigmoid : 2진 분류(Binary Classification,Logistic Regression) 모델의 활성화 함수(Activation Function)

In [2]:
import tensorflow as tf
import math
import matplotlib.pyplot as plt
import numpy as np
tf.random.set_seed(5)

def sigmoid(z):
    return 1./(1. + math.e**-z)

In [14]:
x_data = [[1,2],
          [2,3],
          [3,1],
          [4,3],
          [5,3],
          [6,2]]

y_data = [[0],
          [0],
          [0],
          [1],
          [1],
          [1]]
x_train = np.array(x_data,dtype=np.float32)
y_train = np.array(y_data,dtype=np.float32)

W = tf.Variable(tf.random.normal([2,1]), name = 'weight')
b = tf.Variable(tf.random.normal([1]), name = 'bias')

In [15]:
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)

def cost_func():
    # cost = tf.reduce_mean(tf.square(hypothesis(x_train) - y_train)) # 회귀 모델

    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) + 
                          (1 - y_train)*tf.math.log(1 - hypothesis(x_train)))
    return cost

optimizer = tf.keras.optimizers.Adam(lr=0.01)

In [16]:
print('****** Start Learning!!')
for step in range(5001):
    # cost를 minimize 한다
    optimizer.minimize(cost_func,var_list=[W,b]) # W,b를 업데이트
    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
        
print('****** Learning Finished!!')

****** Start Learning!!
0000 cost:[ 0.8277693 ]  W: [[ 1.3406279]
 [-0.1332199]]  b: [-1.1178697]
1000 cost:[ 0.12722206 ]  W: [[1.5852704]
 [0.4354949]]  b: [-6.2488832]
2000 cost:[ 0.05386779 ]  W: [[2.3047826 ]
 [0.98030955]]  b: [-10.019882]
3000 cost:[ 0.027737541 ]  W: [[2.923696 ]
 [1.3474218]]  b: [-12.9787245]
4000 cost:[ 0.015573931 ]  W: [[3.4833386]
 [1.650385 ]]  b: [-15.568397]
5000 cost:[ 0.009132211 ]  W: [[4.009466 ]
 [1.9239582]]  b: [-17.968874]
****** Learning Finished!!


In [17]:
print('Weight:',W.numpy())
print('bias:', b.numpy())

def predict(X):
    return tf.cast(hypothesis(X) > 0.5, dtype = tf.float32)

x_test = x_train
y_test = y_train

preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy : ',accuracy.numpy())

Weight: [[4.009466 ]
 [1.9239582]]
bias: [-17.968874]
Accuracy :  1.0


## multi-classification
: multi-nomial classification (다중 분류) : Y값의 범주가 3개 이상인 분류
#### 활성화 함수(Activation function) 으로 softmax함수 가 사용된다



In [21]:
# train data set :
# x_data :  [N,4]  --> [8,4]
x_data = [[1,2,1,1],
          [2,1,3,2],
          [3,1,3,4],
          [4,1,5,5],
          [1,7,5,5],
          [1,2,5,6],
          [1,6,6,6],
          [1,7,7,7]]

# y_data : [N,3] --> [8,3]
y_data = [[0,0,1],  # [2]
          [0,0,1],  # [2]
          [0,0,1],  # [2]
          [0,1,0],  # [1]
          [0,1,0],  # [1]
          [0,1,0],  # [1]
          [1,0,0],  # [0]
          [1,0,0]]  # [0]

x_train = np.array(x_data,dtype=np.float32)
y_train = np.array(y_data,dtype=np.float32)

W = tf.Variable(tf.random.normal([4,3]), name = 'weight')
b = tf.Variable(tf.random.normal([3]), name = 'bias')


In [24]:
# # 비용 함수 구현 방법 1: log함수를 사용하여 수식을 직접 표현
# def cost_func():
#     cost = tf.reduce_mean(-tf.reduce_sum(y_train*tf.math.log(hypothesis(x_train)),
#                                          axis=1))
#     return cost
# 비용함수 구현 방법 2 : tf.nn.softmax_cross_entropy_with_logits() 함수 사용
def cost_func():
    cost_i = tf.nn.softmax_cross_entropy_with_logits(logits = logits(x_train),
                                             labels = y_train)
    cost =  tf.reduce_mean(cost_i)
    return cost

def logits(X):
    return tf.matmul(X,W) + b

def hypothesis(X):
    return tf.nn.softmax(logits(X))

optimizer = tf.keras.optimizers.Adam(lr=0.01)

In [25]:
print('****** Start Learning!!')
for step in range(5001):
    # cost를 minimize 한다
    optimizer.minimize(cost_func,var_list=[W,b]) # W,b를 업데이트
    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
        
print('****** Learning Finished!!')

****** Start Learning!!
0000 cost:[ 9.878908 ]  W: [[-0.70010203  0.36601627 -0.08132568]
 [-0.513514   -0.43817282  0.30525848]
 [ 0.12083036 -1.179445    1.4831539 ]
 [ 0.3534133  -1.5123805  -0.21457517]]  b: [-0.7218484  1.6085751 -1.087273 ]
1000 cost:[ 0.36079222 ]  W: [[-2.2377305   0.2610561   1.4514554 ]
 [-0.00528285 -0.24690555 -0.0131535 ]
 [ 0.92016226  0.33322957 -0.08321315]
 [ 0.09760489  0.02930265 -1.3324299 ]]  b: [-3.2477362  -0.78599024  2.487776  ]
2000 cost:[ 0.19642809 ]  W: [[-3.8626187   0.760477    2.2226121 ]
 [-0.14338577 -0.25901413  0.06708437]
 [ 2.3744597   0.46873924 -0.9336092 ]
 [-0.16464697  0.1332273  -1.2890192 ]]  b: [-6.547855  -1.5583214  4.9002404]
3000 cost:[ 0.1104168 ]  W: [[-5.4746475   1.3234361   2.9324396 ]
 [-0.2666849  -0.25400275  0.12868723]
 [ 3.9054184   0.46343267 -1.7107009 ]
 [-0.56691575  0.2606404  -1.1930344 ]]  b: [-9.6826515 -2.0707116  7.0556283]
4000 cost:[ 0.06373567 ]  W: [[-7.01363     1.883223    3.6       ]
 [-0.389

In [26]:
# 예측
# tf.argmax() : 값이 가장 큰 요소의 인덱스 값을 반환
def predict(X):
    return tf.argmax(hypothesis(X),axis=1)


# 학습 데이터를 검증 데이터로 동일하게 사용하는 경우
x_test = x_train
y_test = y_train

preds = predict(x_test)
print(preds.numpy())
print(hypothesis(x_test).numpy())
print(tf.argmax(y_test,1).numpy())

correct_predict = tf.equal(predict(x_test),tf.argmax(y_test,1))
accuracy = tf.reduce_mean(tf.cast(correct_predict, dtype = tf.float32))
print("Accuracy:",accuracy.numpy())

[2 2 2 1 1 1 0 0]
[[3.8040382e-14 1.0293008e-05 9.9998975e-01]
 [7.5317377e-11 1.1370678e-02 9.8862934e-01]
 [6.9941707e-17 3.9894555e-02 9.6010542e-01]
 [2.2848299e-15 9.6675694e-01 3.3243079e-02]
 [7.0714615e-02 9.2695910e-01 2.3262901e-03]
 [3.7234403e-02 9.6271604e-01 4.9614675e-05]
 [9.0706098e-01 9.2937954e-02 1.0319754e-06]
 [9.9858642e-01 1.4136194e-03 1.7703172e-10]]
[2 2 2 1 1 1 0 0]
Accuracy: 1.0
