## Diabetes class version 1
#### 문제점 1 : train() 시간을 보면 거의 몇 시간 (hour) 걸림

In [2]:
import numpy as np
from datetime import datetime

# 수치미분 함수

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [3]:
# Diabetes Class

class Diabetes:
    
    # 생성자
    # xdata, tdata => numpy.array(...)
    def __init__(self, xdata, tdata, i_nodes, h_nodes, o_nodes, lr, iter_count):
                
        # 입력 데이터, 정답 데이터 초기화
        self.xdata = xdata
        self.tdata = tdata
        
        # 2층 hidden layer unit : 
        # 가중치 W, 바이어스 b 초기화
        self.W2 = np.random.rand(i_nodes, h_nodes)  
        self.b2 = np.random.rand(h_nodes)
        
        # 3층 output layer unit : 1 개 
        self.W3 = np.random.rand(h_nodes, o_nodes)
        self.b3 = np.random.rand(o_nodes)
                        
        # 학습률 learning rate 초기화
        self.learning_rate = lr
        
        # 반복획수 초기화
        self.iteration_count = iter_count
        
    # 손실함수
    def loss_func(self):
        
        delta = 1e-7    # log 무한대 발산 방지
    
        z2 = np.dot(self.xdata, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)
    
        # cross-entropy 
        return  -np.sum( self.tdata*np.log(y + delta) + (1-self.tdata)*np.log((1 - y)+delta ) )
    
    # obtain W and b
    def get_W_b(self):
        
        return self.W2,  self.b2, self.W3, self.b3
    
    # 손실 값 계산
    def error_val(self):
        
        delta = 1e-7    # log 무한대 발산 방지
    
        z2 = np.dot(self.xdata, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)
    
        # cross-entropy 
        return  -np.sum( self.tdata*np.log(y + delta) + (1-self.tdata)*np.log((1 - y)+delta ) )
    
    # query, 즉 미래 값 예측 함수
    def predict(self, data):
        
        z2 = np.dot(data, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)
    
        if y >= 0.5:
            result = 1  # True
        else:
            result = 0  # False
    
        return result


    # 수치미분을 이용하여 손실함수가 최소가 될때 까지 학습하는 함수
    def train(self):
        
        f = lambda x : self.loss_func()
        
        print("Initial error value = ", self.error_val())
        
        start_time = datetime.now()
        
        for step in  range(self.iteration_count):
            
            self.W2 -= self.learning_rate * numerical_derivative(f, self.W2)
    
            self.b2 -= self.learning_rate * numerical_derivative(f, self.b2)
        
            self.W3 -= self.learning_rate * numerical_derivative(f, self.W3)
    
            self.b3 -= self.learning_rate * numerical_derivative(f, self.b3)
    
            if (step % (int)(0.05*self.iteration_count) == 0):
                print("step = ", step, "error value = ", self.error_val())
                
        end_time = datetime.now()
        
        print("")
        print("Elapsed Time => ", end_time - start_time)

In [4]:
try:
    loaded_data = np.loadtxt('./(200309)diabetes.csv', delimiter=',')

    x_data = loaded_data[ :, 0:-1]
    t_data = loaded_data[ :, [-1]]

    print("loaded_data = ", loaded_data.shape)
    print("x_data = ", x_data.shape, ", t_data = ", t_data.shape)

except Exception as err:
    print(str(err))

loaded_data =  (759, 9)
x_data =  (759, 8) , t_data =  (759, 1)


In [6]:
input_nodes = x_data.shape[-1]
hidden_nodes = 30
output_nodes = t_data.shape[-1]

lr = 1e-2
iter_count = 30001

obj1 = Diabetes(x_data, t_data, input_nodes, hidden_nodes, output_nodes, lr, iter_count)

obj1.train()

# 시간이 비약적으로 오래걸림
# 한행씩 

Initial error value =  2112.138351210094
step =  0 error value =  493.0014109557683


KeyboardInterrupt: 