<a href="https://colab.research.google.com/github/Temple2001/ML_practice/blob/main/codes/DeepLearning_practice_5_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DataGeneration class 를 이용한 Diabetes 구현


In [1]:
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = float(tmp_val) - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad


def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [2]:
class Diabetes:

    def __init__(self, name, input_nodes, hidden1_nodes, output_nodes, learning_rate):
        self.name = name
        
        self.W2 = np.random.rand(input_nodes, hidden1_nodes)
        self.b2 = np.random.rand(hidden1_nodes)

        self.W3 = np.random.rand(hidden1_nodes, output_nodes)
        self.b3 = np.random.rand(output_nodes)

        self.learning_rate = learning_rate

        print(self.name, ' is created !!!')
    
    def feed_forward(self):
        delta = 1e-7

        z2 = np.dot(self.input_data, self.W2) + self.b2
        a2 = sigmoid(z2)

        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)

        return -np.sum(self.target_data*np.log(y+delta)+(1-self.target_data)*np.log((1-y)+delta))
    
    def loss_val(self):
        delta = 1e-7

        z2 = np.dot(self.input_data, self.W2) + self.b2
        a2 = sigmoid(z2)

        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)

        return -np.sum(self.target_data*np.log(y+delta)+(1-self.target_data)*np.log((1-y)+delta))

    def predict(self, input_data):
        z2 = np.dot(self.input_data, self.W2) + self.b2
        a2 = sigmoid(z2)

        z3 = np.dot(a2, self.W3) + self.b3
        y = a3 = sigmoid(z3)

        if y >= 0.5:
            result = 1
        else:
            result = 0
        
        return y, result
    
    def accuracy(self, input_data, target_data):
        matched_list = []
        not_matched_list = []

        for index in range(len(input_data)):
            (real_val, logical_val) = predict(input_data[index])

            if logical_val == target_data[index]:
                matched_list.append(index)
            else:
                not_matched_list.append(index)
        
        accuracy_val = len(matched_list) / len(input_data)

        return accuracy_val
    
    def train(self, input_data, target_data):
        self.input_data = input_data
        self.target_data = target_data

        f = lambda x : feed_forward()

        self.W2 -= self.learning_rate * numerical_derivative(f, self.W2)
    
        self.b2 -= self.learning_rate * numerical_derivative(f, self.b2)
        
        self.W3 -= self.learning_rate * numerical_derivative(f, self.W3)
    
        self.b3 -= self.learning_rate * numerical_derivative(f, self.b3)

In [3]:
class DataGeneration:

    def __init__(self, name, file_path, seperation_rate, target_position=-1):
        self.name = name
        self.file_path = file_path
        self.seperation_rate = seperation_rate

        if (target_position == -1 or target_position == 0):
            self.target_position = target_position
        else:
            err_str = 'target_position must be -1 or 0'
            raise Exception(err_str)
    
    def __display_target_distribution(self, str_of_kind='original data'):
        print('=========================================================================================')

        target_data = data[:, self.target_position]

        unique, counts = np.unique(target_data, return_counts='True')

        unique_target = []

        for index in range(len(unique)):
            print('[DataGeneration] unique number of ' + str_of_kind + ' = ', unique[index], ', count = ', counts[index])
            unique_target.append(unique[index])
        
        for index in range(len(unique_target)):
            print('[DataGeneration] unique number of ' + str_of_kind + ' = ', unique_target[index], ', ratio = ', np.round(100*counts[index] / (target_data.shape[0]), 2), ' %')

        print('=========================================================================================')
    
    def generate(self):
        try:
            loaded_data = np.loadtxt(self.file_path, delimiter=',', dtype=np.float32)
        except Exception as err:
            print('[DataGeneration::generate()]  ', str(err))
            raise Exception(str(err))
        
        print('[DataGeneration] loaded_data.shape = ', loaded_data.shape)

        total_data_num = len(loaded_data)
        test_data_num = int(len(loaded_data)*self.seperation_rate)

        np.random.shuffle(loaded_data)

        test_data = loaded_data[0:test_data_num]

        training_data = loaded_data[test_data_num:]

        self.__display_target_distribution(training_data, 'training data')
        self.__display_target_distribution(test_data, 'test data')

        return training_data, test_data

In [6]:
test_seperation_rate = 0.4

try:
    data_obj = DataGeneration('Diabetes', 'diabetes.csv', test_seperation_rate)
    (training_data, test_data) = data_obj.generate()

    print('================================================')
    print('training data.shape = ', training_data.shape)
    print('test data.shape = ', test_data.shape)
    print('================================================')
    
except Exception as err:
    print('Exception occur !!')

[DataGeneration] loaded_data.shape =  (759, 9)
Exception occur !!
