<a href="https://colab.research.google.com/github/Temple2001/ML_practice/blob/main/codes/DeepLearning_practice_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Example 5
**numpy.random.shuffle 기능을 이용해 training data와 test data를 분리비율에 맞게 생성**

In [3]:
import numpy as np

class DataGeneration:

    def __init__(self, name, file_path, seperation_rate):
        self.name = name
        self.file_path = file_path
        self.seperation_rate = seperation_rate
    
    def generate(self):
        try:
            loaded_data = np.loadtxt(self.file_path, delimiter=',', dtype=np.float32)
        except Exception as err:
            print('[DataGeneration::generate()', str(err))
            raise Exception(str(err))
        print('[DataGeneration] loaded_data.shape = ', loaded_data.shape)

        total_data_num = len(loaded_data)
        test_data_num = int(len(loaded_data) * self.seperation_rate)

        np.random.shuffle(loaded_data)

        test_data = loaded_data[0:test_data_num]

        training_data = loaded_data[test_data_num:]

        return training_data, test_data


**DataGeneration ver1 테스트, 분리비율 30%**

In [5]:
seperation_rate = 0.3

try:
    data_obj1 = DataGeneration('Diabetes', 'diabetes.csv', seperation_rate)

    (generated_training_data, generated_test_data) = data_obj1.generate()

    print('generated_training_data.shape = ', generated_training_data.shape)
    print('generated_test_data.shape = ', generated_test_data.shape)
except Exception as err:
    print('Exception Occur !!')
    print(str(err))

[DataGeneration] loaded_data.shape =  (759, 9)
generated_training_data.shape =  (532, 9)
generated_test_data.shape =  (227, 9)


**DataGeneration ver1 테스트, 분리비율 50%**

In [7]:
seperation_rate = 0.5

try:
    data_obj2 = DataGeneration('Diabetes', 'diabetes.csv', seperation_rate)

    (generated_training_data, generated_test_data) = data_obj2.generate()

    print('generated_training_data.shape = ', generated_training_data.shape)
    print('generated_test_data.shape = ', generated_test_data.shape)
except Exception as err:
    print('Exception Occur !! ')
    print(str(err))

[DataGeneration] loaded_data.shape =  (759, 9)
generated_training_data.shape =  (380, 9)
generated_test_data.shape =  (379, 9)


# Example 6
**DataGeneration class를 이용해 Diabetes 구현하기**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

np.random.seed(0)

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = float(tmp_val) - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [7]:
class Diabetes:
    
    # 생성자
    # xdata, tdata => numpy.array(...)
    def __init__(self, name, input_nodes, hidden1_nodes, hidden2_nodes, output_nodes, learning_rate):
        
        self.name = name
        
        # 2층 hidden layer unit 
        # 가중치 W, 바이어스 b 초기화
        self.W2 = np.random.rand(input_nodes, hidden1_nodes)  
        self.b2 = np.random.rand(hidden1_nodes)
        
        # 3층 hidden layer unit  
        self.W3 = np.random.rand(hidden1_nodes, hidden2_nodes)
        self.b3 = np.random.rand(hidden2_nodes)
        
        # 4층 output layer unit  
        self.W4 = np.random.rand(hidden2_nodes, output_nodes)
        self.b4 = np.random.rand(output_nodes)
                        
        # 학습률 learning rate 초기화
        self.learning_rate = learning_rate
        
        print(self.name, " is created !!!")
        
    # 손실함수
    def feed_forward(self):
        
        delta = 1e-7    # log 무한대 발산 방지
    
        z2 = np.dot(self.input_data, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2, self.W3) + self.b3
        a3 = sigmoid(z3)
        
        z4 = np.dot(a3, self.W4) + self.b4
        y = a4 = sigmoid(z4)
    
        # cross-entropy 
        return  -np.sum( self.target_data*np.log(y + delta) + (1-self.target_data)*np.log((1 - y)+delta ) )
    
    # 손실 값 계산
    def loss_val(self):
        
        delta = 1e-7    # log 무한대 발산 방지
    
        z2 = np.dot(self.input_data, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2, self.W3) + self.b3
        a3 = sigmoid(z3)
        
        z4 = np.dot(a3, self.W4) + self.b4
        y = a4 = sigmoid(z4)
    
        # cross-entropy 
        return  -np.sum( self.target_data*np.log(y + delta) + (1-self.target_data)*np.log((1 - y)+delta ) )
    
    # query, 즉 미래 값 예측 함수
    def predict(self, input_data):    
        
        z2 = np.dot(input_data, self.W2) + self.b2
        a2 = sigmoid(z2)
        
        z3 = np.dot(a2, self.W3) + self.b3
        a3 = sigmoid(z3)
        
        z4 = np.dot(a3, self.W4) + self.b4
        y = a4 = sigmoid(z4)
    
        if y >= 0.5:
            result = 1  # True
        else:
            result = 0  # False
    
        return y, result
    
    def accuracy(self, input_data, target_data):
        
        matched_list = []
        not_matched_list = []
        
        for index in range(len(input_data)):
            
            (real_val, logical_val) = self.predict(input_data[index])
            
            if logical_val == target_data[index]:
                matched_list.append(index)
            else:
                not_matched_list.append(index)
                
        accuracy_val = len(matched_list) / len(input_data)
        
        return accuracy_val
    
        
    # 수치미분을 이용하여 손실함수가 최소가 될때 까지 학습하는 함수
    def train(self, input_data, target_data):
        
        self.input_data = input_data
        self.target_data = target_data
        
        f = lambda x : self.feed_forward()
        
        self.W2 -= self.learning_rate * numerical_derivative(f, self.W2)
    
        self.b2 -= self.learning_rate * numerical_derivative(f, self.b2)
        
        self.W3 -= self.learning_rate * numerical_derivative(f, self.W3)
    
        self.b3 -= self.learning_rate * numerical_derivative(f, self.b3)
        
        self.W4 -= self.learning_rate * numerical_derivative(f, self.W4)
    
        self.b4 -= self.learning_rate * numerical_derivative(f, self.b4)

In [3]:
class DataGeneration:

    def __init__(self, name, file_path, seperation_rate):
        self.name = name
        self.file_path = file_path
        self.seperation_rate = seperation_rate

    def generate(self):
        try:
            loaded_data = np.loadtxt(self.file_path, delimiter=',', dtype=np.float32)
        except Exception as err:
            print('[DataGeneration::generate()] ', str(err))
            raise Exception(str(err))
        
        print('[DataGeneration] loaded_data.shape = ', loaded_data.shape)

        total_data_num = len(loaded_data)
        test_data_num = int(len(loaded_data)*self.seperation_rate)

        np.random.shuffle(loaded_data)

        test_data = loaded_data[0:test_data_num]
        training_data = loaded_data[test_data_num:]

        return training_data, test_data

In [4]:
seperation_rate = 0.3

data_obj = DataGeneration('Diabetes', 'diabetes.csv', seperation_rate)

(training_data, test_data) = data_obj.generate()

print('training_data.shape = ', training_data.shape, ', test_data.shape = ', test_data.shape)

[DataGeneration] loaded_data.shape =  (759, 9)
training_data.shape =  (532, 9) , test_data.shape =  (227, 9)


In [12]:
i_nodes = training_data.shape[1] - 1
h1_nodes = 3
h2_nodes = 2
o_nodes = 1
lr = 1e-1
epochs = 20

obj = Diabetes('Diabetes', i_nodes, h1_nodes, h2_nodes, o_nodes, lr)

print('Neural Network Learning using Numerical Derivative...')

start_time = datetime.now()

for step in range(epochs):
    for index in range(len(training_data)):
        input_data = training_data[index, 0:-1]
        target_data = training_data[index, [-1]]

        obj.train(input_data, target_data)
    
    cur_loss_val = obj.loss_val()
    print('epochs = ', step, ', loss value = ', cur_loss_val)

end_time = datetime.now()

print('')
print('Elapsed time => ', end_time - start_time)

Diabetes  is created !!!
Neural Network Learning using Numerical Derivative...
epochs =  0 , loss value =  0.9467978307140607
epochs =  1 , loss value =  0.9532895367523144
epochs =  2 , loss value =  0.9586855788711828
epochs =  3 , loss value =  0.9617075333967966
epochs =  4 , loss value =  0.9606223170204341
epochs =  5 , loss value =  0.9501682798523977
epochs =  6 , loss value =  0.9132378919148948
epochs =  7 , loss value =  0.8292359404341895
epochs =  8 , loss value =  0.7288373257663119
epochs =  9 , loss value =  0.6431064080379508
epochs =  10 , loss value =  0.5832377882460598
epochs =  11 , loss value =  0.5477698975471286
epochs =  12 , loss value =  0.525798720039755
epochs =  13 , loss value =  0.5077574751895141
epochs =  14 , loss value =  0.48764582703055603
epochs =  15 , loss value =  0.46556978143579175
epochs =  16 , loss value =  0.4453632112909887
epochs =  17 , loss value =  0.43138257761455057
epochs =  18 , loss value =  0.42462935485353764
epochs =  19 , l

In [11]:
test_input_data = test_data[:, 0:-1]
test_target_data = test_data[:,-1]

accuracy_ret = obj.accuracy(test_input_data, test_target_data)

print('Accuracy => ', accuracy_ret)

Accuracy =>  0.7577092511013216
