<a href="https://colab.research.google.com/github/HIsu1231/AI_INOVATION_SQUARE/blob/master/Diabetes_OneHotEncoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
# 수치미분 함수

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [None]:
class DataGeneration:

  def __init__(self, name, file_path, seperation_rate, target_position=-1):

    self.name = name
    
    self.file_path = file_path

    self.seperation_rate= seperation_rate

    if target_position == -1 or target_position == 0:
      self.target_position = target_position
    else:
      str_err = 'target_position must be -1 or 0'
      raise Exception(str_err)

    print(name,"data generation start!") 

  def __display_target_distribution(self, data, kind_of_data='original data'):

    target_data = data[:,self.target_position]

    unique, counts = np.unique(target_data, return_counts = True)

    print("=======================================================================================")

    for i in range(len(unique)):
      print("[DataGeneration] unique number of",kind_of_data,"=", unique[i], ",count = ",counts[i])

    for i in range(len(unique)):
      print("[DataGeneration] unique number of",kind_of_data,"=",unique[i], ",ratio = ",np.round(100*counts[i]/len(target_data),2),'%')

    print("========================================================================================")

  def generate(self):

    try:
      loaded_data = np.loadtxt(self.file_path, delimiter=',',dtype=np.float32)

    except Exception as err:
      print('[DataGeneration::generate()]',str(err))
      raise Exception(str(err))

    total_data_num = len(loaded_data)
    test_data_num = int(total_data_num * self.seperation_rate)

    np.random.shuffle(loaded_data)

    test_data = loaded_data[ 0:test_data_num]
    training_data = loaded_data[ test_data_num: ]

    self.__display_target_distribution(training_data, 'training_data')
    self.__display_target_distribution(test_data, 'test_data')

    return test_data, training_data

In [None]:
class Diabetes():

  def __init__(self,name, i_nodes, h_nodes, o_nodes, learning_rate):

    self.W2 = np.random.rand(i_nodes,h_nodes)
    self.b2 = np.random.rand(h_nodes)

    self.W3 = np.random.rand(h_nodes,o_nodes)
    self.b3 = np.random.rand(o_nodes)

    self.learning_rate = learning_rate

    print(name,"object is created!")

  def feed_forward(self):

    delta = 1e-7

    z2 = np.dot(self.input_data, self.W2) + self.b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, self.W3) + self.b3
    y = a3 = sigmoid(z3) 

    return -np.sum(self.target_data*np.log(y+delta) + (1-self.target_data)*np.log((1-y)+delta))

  def loss_val(self):

    delta = 1e-7

    z2 = np.dot(self.input_data, self.W2) + self.b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, self.W3) + self.b3
    y = a3 = sigmoid(z3)

    return -np.sum(self.target_data*np.log(y+delta) + (1-self.target_data)*np.log((1-y)+delta))

  def predict(self,input_data):

    z2 = np.dot(input_data, self.W2) + self.b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, self.W3) + self.b3
    y = a3 = sigmoid(z3)

    predicted_num = np.argmax(y)

    return predicted_num

  def accuracy(self,input_data,target_data):

    matched_list = []
    unmatched_list = []

    for i in range(len(input_data)):

      label = int(target_data[i])

      predicted_num = self.predict(input_data[i])

      if predicted_num == label:
        matched_list.append(i)
      else:
        unmatched_list.append(i)

    accuracy_result = len(matched_list)/len(input_data)

    return unmatched_list, accuracy_result

  def train(self, input_data, target_data):
    
    f = lambda x : self.feed_forward()

    self.input_data = input_data
    self.target_data = target_data

    self.W2 -= self.learning_rate * numerical_derivative(f, self.W2)
    self.b2 -= self.learning_rate * numerical_derivative(f, self.b2)

    self.W3 -= self.learning_rate * numerical_derivative(f, self.W3)
    self.b3 -= self.learning_rate * numerical_derivative(f, self.b3)

In [None]:
test_seperation_rate = 0.3

data_obj1 = DataGeneration('Diabetes','drive/My Drive/AI_INOVATION_SQUARE/prac/(200309)diabetes.csv',test_seperation_rate)

(training_data,test_data) = data_obj1.generate()

print("=====================================================")
print("training data.shape = ",training_data.shape)
print("test data.shape = ",test_data.shape)
print("=====================================================")

Diabetes data generation start!
[DataGeneration] unique number of training_data = 0.0 ,count =  180
[DataGeneration] unique number of training_data = 1.0 ,count =  352
[DataGeneration] unique number of training_data = 0.0 ,ratio =  33.83 %
[DataGeneration] unique number of training_data = 1.0 ,ratio =  66.17 %
[DataGeneration] unique number of test_data = 0.0 ,count =  83
[DataGeneration] unique number of test_data = 1.0 ,count =  144
[DataGeneration] unique number of test_data = 0.0 ,ratio =  36.56 %
[DataGeneration] unique number of test_data = 1.0 ,ratio =  63.44 %
training data.shape =  (227, 9)
test data.shape =  (532, 9)


In [None]:
#hyper_paramter
i_nodes = training_data.shape[1]-1
h_nodes = 30
o_nodes = 2
lr=1e-2
epochs = 20

loss_val_list = []

obj1 = Diabetes('Diabetes',i_nodes,h_nodes,o_nodes,lr)

print("Neural Network Learning using Numerical Derivative...")

start_time= datetime.now()

for step in range(epochs):
  for i in range(len(training_data)):
    input_data = training_data[i,0:-1]

    target_data = np.zeros(o_nodes) + 0.01
    target_data[int(training_data[i,-1])] = 0.99

    obj1.train(input_data,target_data)
  
  cur_loss_val = obj1.loss_val()
  loss_val_list.append(cur_loss_val)

  print("step = ",step,", current loss value = ",cur_loss_val)

end_time = datetime.now()

print("")
print("Elapsed time =>",end_time - start_time)

Diabetes object is created!
Neural Network Learning using Numerical Derivative...
step =  0 , current loss value =  0.8906855543369553
step =  1 , current loss value =  0.9411611374681328
step =  2 , current loss value =  0.8652654797973749
step =  3 , current loss value =  0.7913810356519624
step =  4 , current loss value =  0.726144793014809
step =  5 , current loss value =  0.668713818195945
step =  6 , current loss value =  0.6181847740998516
step =  7 , current loss value =  0.5737511052919824
step =  8 , current loss value =  0.5346841623070474
step =  9 , current loss value =  0.5003218786015455
step =  10 , current loss value =  0.4700642677813519
step =  11 , current loss value =  0.4433718661076985
step =  12 , current loss value =  0.41976457035275205
step =  13 , current loss value =  0.3988196096111263
step =  14 , current loss value =  0.3801683143402142
step =  15 , current loss value =  0.36349186626159513
step =  16 , current loss value =  0.34851641688015733
step =  1

In [None]:
test_input_data = test_data[ :, 0:-1 ]
test_target_data = test_data[ :, -1 ]

(false_list, accuracy_ret) = obj1.accuracy(test_input_data, test_target_data) 

print('Test Data Accuracy = ', np.round(accuracy_ret, 3))

Test Data Accuracy =  0.724
