# MLP Gradient Descent
* Without Error Backpropagation

In [1]:
import warnings
warnings.filterwarnings('ignore')

# I. 수치미분(Numerical Derivative)

> ## 1) Import numpy

In [2]:
import numpy as np 

> ## 2) gradient( ) 함수 정의

* 다변수 함수의 수치미분

In [3]:
def gradient(machine, param):

    if param.ndim == 1:
        temp_param = param
        delta = 0.00005
        learned_param = np.zeros(param.shape)
        
        for index in range(len(param)):
            target_param = float(temp_param[index])
            temp_param[index] = target_param + delta            
            param_plus_delta = machine(temp_param)
            temp_param[index] = target_param - delta  
            param_minus_delta = machine(temp_param)
            learned_param[index] = (param_plus_delta - param_minus_delta ) / (2 * delta)
            temp_param[index] = target_param

        return learned_param
        

    elif param.ndim == 2:
        temp_param = param
        delta = 0.00005
        learned_param = np.zeros(param.shape)
    
        rows = param.shape[0]
        columns = param.shape[1]
    
        for row in range(rows):
            for column in range(columns):
                target_param = float(temp_param[row, column])
                temp_param[row, column] = target_param + delta            
                param_plus_delta = machine(temp_param)
                temp_param[row, column] = target_param - delta            
                param_minus_delta = machine(temp_param)
                learned_param[row, column] = (param_plus_delta - param_minus_delta) / (2 * delta)
                temp_param[row, column] = target_param

        return learned_param

> ## 3) Activation - sigmoid( )

In [4]:
def sigmoid(x):
    y_hat = 1 / (1 + np.exp(-x))
    return y_hat

# II. Data Preprocessing

## 1)실습용 데이터 설정
* iris.csv

In [5]:
import seaborn as sns

DF = sns.load_dataset('iris')

* pandas DataFrame

In [6]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [7]:
DF.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


> ## 2) Data Set Slicing

* X = ['petal_length', 'petal_width']

In [8]:
X = DF.iloc[:100, 2:4]

y = DF.iloc[:100, 4]

* DataFrame to Array

In [9]:
X_input = X.values
y_output = y.values

> ## 3) Integer Encoding

* LabelEncoder( )
 - ['setosa', 'virginica'] to [0, 1]

In [10]:
from sklearn.preprocessing import LabelEncoder

encoder =  LabelEncoder()
y_output = encoder.fit_transform(y_output)

y_output

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

* Array Shape

In [11]:
X_input.shape, y_output.shape

((100, 2), (100,))

# III. Model Training(Learning)

> ## 1) MLP_Classifier Class

In [12]:
class MLP_Classifier:
    
    def __init__(self, problem_Type, X_input, y_output):  

# problem_Type 문자열 지정 Member      
        self.Type = problem_Type
        
# X_input, y_output Member 초기화
        self.X_input = X_input.reshape(100, 2)
        self.y_output = y_output.reshape(100, 1)
        
# W_1, b_1 Member 초기화
        self.W_1 = np.random.rand(2, 2)  
        self.b_1 = np.random.rand(2)

# W_2, b_2 Member 초기화
        self.W_2 = np.random.rand(2, 1)  
        self.b_2 = np.random.rand(1)

# learning_rate Member 지정
        self.learning_rate = 0.01

# Cost_Function(CEE) Method
    def cost_func(self):

        z_1 = np.dot(self.X_input, self.W_1) + self.b_1     # Hidden Layer
        a_1 = sigmoid(z_1)                                
        
        z_2 = np.dot(a_1, self.W_2) + self.b_2            # Output Layer
        y_hat = sigmoid(z_2)                        

        delta = 0.00001
        return -np.sum(self.y_output * np.log(y_hat + delta) + (1 - self.y_output) * np.log((1 - y_hat) + delta))      

# Learning Method
    def learn(self):
        machine = lambda x : self.cost_func()
        print('Initial Cost = ', self.cost_func())
        
        for step in  range(10001):
            self.W_1 = self.W_1- self.learning_rate * gradient(machine, self.W_1)
            self.b_1 = self.b_1 - self.learning_rate * gradient(machine, self.b_1)

            self.W_2 = self.W_2 - self.learning_rate * gradient(machine, self.W_2)
            self.b_2 = self.b_2 - self.learning_rate * gradient(machine, self.b_2)
    
            if (step % 1000 == 0):
                print('Step = ', step, 'Cost = ', self.cost_func())
                
# Predict Method
    def predict(self, input_data):
        
        z_1 = np.dot(input_data, self.W_1) + self.b_1     # Hidden Layer
        a_1 = sigmoid(z_1)                                
        
        z_2 = np.dot(a_1, self.W_2) + self.b_2            # Output Layer
        y_prob = sigmoid(z_2)                             


        if y_prob > 0.5:
            result = 1
        else:
            result = 0
    
        return y_prob, result

> ## 2) IRIS.learn( )

In [13]:
%%time

IRIS = MLP_Classifier('Binary_Classification', X_input, y_output)

IRIS.learn()

Initial Cost =  105.20400338324318
Step =  0 Cost =  81.17955991580791
Step =  1000 Cost =  0.38242994361762234
Step =  2000 Cost =  0.1772011108755237
Step =  3000 Cost =  0.11440939272028285
Step =  4000 Cost =  0.08414814809267498
Step =  5000 Cost =  0.06638142216595866
Step =  6000 Cost =  0.054710124519299214
Step =  7000 Cost =  0.04646326464646997
Step =  8000 Cost =  0.040329797630184745
Step =  9000 Cost =  0.03559140863745613
Step =  10000 Cost =  0.03182179567899971
CPU times: user 14.2 s, sys: 907 ms, total: 15.1 s
Wall time: 14 s


> ## 3) IRIS.predict( )

In [14]:
y_hat = []

for input_data in X_input:
    (sigmoid_val, logical_val) = IRIS.predict(input_data) 
    print(input_data, ' = ', logical_val)

    y_hat.append(logical_val)

[1.4 0.2]  =  0
[1.4 0.2]  =  0
[1.3 0.2]  =  0
[1.5 0.2]  =  0
[1.4 0.2]  =  0
[1.7 0.4]  =  0
[1.4 0.3]  =  0
[1.5 0.2]  =  0
[1.4 0.2]  =  0
[1.5 0.1]  =  0
[1.5 0.2]  =  0
[1.6 0.2]  =  0
[1.4 0.1]  =  0
[1.1 0.1]  =  0
[1.2 0.2]  =  0
[1.5 0.4]  =  0
[1.3 0.4]  =  0
[1.4 0.3]  =  0
[1.7 0.3]  =  0
[1.5 0.3]  =  0
[1.7 0.2]  =  0
[1.5 0.4]  =  0
[1.  0.2]  =  0
[1.7 0.5]  =  0
[1.9 0.2]  =  0
[1.6 0.2]  =  0
[1.6 0.4]  =  0
[1.5 0.2]  =  0
[1.4 0.2]  =  0
[1.6 0.2]  =  0
[1.6 0.2]  =  0
[1.5 0.4]  =  0
[1.5 0.1]  =  0
[1.4 0.2]  =  0
[1.5 0.2]  =  0
[1.2 0.2]  =  0
[1.3 0.2]  =  0
[1.4 0.1]  =  0
[1.3 0.2]  =  0
[1.5 0.2]  =  0
[1.3 0.3]  =  0
[1.3 0.3]  =  0
[1.3 0.2]  =  0
[1.6 0.6]  =  0
[1.9 0.4]  =  0
[1.4 0.3]  =  0
[1.6 0.2]  =  0
[1.4 0.2]  =  0
[1.5 0.2]  =  0
[1.4 0.2]  =  0
[4.7 1.4]  =  1
[4.5 1.5]  =  1
[4.9 1.5]  =  1
[4.  1.3]  =  1
[4.6 1.5]  =  1
[4.5 1.3]  =  1
[4.7 1.6]  =  1
[3.3 1. ]  =  1
[4.6 1.3]  =  1
[3.9 1.4]  =  1
[3.5 1. ]  =  1
[4.2 1.5]  =  1
[4. 1.] 

> ## 4) Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_output, y_hat)

array([[50,  0],
       [ 0, 50]])

# 
# 
# 
# The End
# 
# 
# 