> sklearn없이 numpy만 사용하여  
미니배치경사하강법 + 조기종료 소프트맥스회귀 구현하기

# Data Preparing

In [98]:
import numpy as np
from sklearn import datasets

iris  = datasets.load_iris()
X, y = iris['data'], iris['target']

In [99]:
def data_split(X, y, test_ratio = 0.2, seed = 42):
    np.random.seed(seed)
    
    data_len = X.shape[0]
    ran_idx = np.random.permutation(data_len)
    
    X, y = X[ran_idx], y[ran_idx]
    X_train, X_test, y_train, y_test = (
        X[:int(data_len * (1-test_ratio))], X[int(data_len * (1-test_ratio)):],
        y[:int(data_len * (1-test_ratio))], y[int(data_len * (1-test_ratio)):]
    )
    
    return X_train, X_test, y_train, y_test

X_train_full, X_test, y_train_full, y_test = data_split(X, y)

In [100]:
X_train, X_valid = X_train_full[:-20], X_train_full[-20:]
y_train, y_valid = y_train_full[:-20], y_train_full[-20:]

### sub component : one hot

In [90]:
n_class = len(np.unique(y_train))   
data_len = len(X_train)

onehot_matrix = np.zeros((n_class, data_len))
onehot_matrix[y_train, np.arange(data_len)] = 1

# Model

### sub component : softmax

In [32]:
def softmax(x):
    e_x = np.exp(x)
    return e_x / e_x.sum()

# np.apply_along_axis(softmax, 1, X_train)

### sub component : initialization

In [91]:
# weight initalization (Xavier)
fan_in = n_feature
fan_out = n_class

sigma_W = 2 / (fan_in + fan_out)
W = sigma_W * np.random.randn(n_class, n_feature + 1)

### sub component : forward pass calculation

In [92]:
data_len = len(X_train)

X_train_with_bias = np.concatenate((np.ones((data_len, 1)), X_train), axis = 1)

class_score_matrix = np.dot(W, X_train_with_bias.T)
class_probability = np.apply_along_axis(softmax, 0, class_score_matrix)
prediction = class_probability.argmax(axis = 0)

loss = - (onehot_matrix * np.log(class_probability)).sum() / data_len

### Training Algorithm

In [121]:
data_len = len(X_train)

eta = 0.01 
n_iter = 5001
eps = 1e-7

min_loss = float("inf")
patience = 0
terminate = False

history = {}
epochs = []
losses = []
weights = []

def fit(X_train, y_train, X_valid, y_valid, n_iter, patience_upper = 20):
    for iteration in range(n_iter):
        # train loss
        class_score_matrix = np.dot(W, X_train_with_bias.T)
        class_probability = np.apply_along_axis(softmax, 0, class_score_matrix)
        train_loss = - (onehot_matrix * np.log(class_probability)).sum() / data_len

        # parameters update
        gradient = np.dot((class_probability - onehot_matrix), X_train_with_bias) / data_len
        W = W - eta * gradient

        # validation loss
        val_class_score_matrix = np.dot(W, X_valid_with_bias.T)
        val_class_probability = np.apply_along_axis(softmax, 0, val_class_score_matrix)
        val_loss = - (val_onehot_matrix * np.log(val_class_probability)).sum() / val_len

        # monitoring
        if iteration % 500 == 0:
            print("iter{}, train_loss : {}, valid_loss : {}", iteration, loss, val_loss)

        if val_loss < min_loss:
            min_loss = val_loss

            # saving history
            epochs.append(iteration)
            losses.append(min_loss)
            weights.append(W)

        # stopping rule
        else :
            patience += 1
            if patience == patience_upper: 
                terminate = True
                print('** Early Stopped **') 

        # End flow
        if terminate == True:
            history['epochs'] = epochs
            history['val_losses'] = losses
            history['weights'] = weights
            break
    
    return history

# Softmax Regressor (순한맛)

In [116]:
import numpy as np

def softmax(x):
    e_x = np.exp(x)
    return e_x / e_x.sum()

def y_to_onehot(y, n_class):
    data_len = len(y)

    onehot_matrix = np.zeros((n_class, data_len))
    onehot_matrix[y, np.arange(data_len)] = 1
    return onehot_matrix

def add_bias(x):
    return np.concatenate((np.ones((len(x), 1)), x), axis = 1)


class SoftmaxRegressor(object):
    def __init__(self, W = None, eta = 0.01, max_iter = 5001, history = None):
        self.W = W
        self.eta = eta
        self.max_iter = max_iter
        self.eps = 1e-7
        self.history = history
    
    def fit(self, X_train, y_train, X_valid, y_valid, patience_upper = 20):
        self.history = {}
        terminate = False
        epochs = []
        losses = []
        weights = []
        
        n_feature = X_train.shape[1]
        n_class = len(np.unique(y_train))
        train_data_len = len(X_train)
        val_data_len = len(X_valid)
        min_loss = float("inf")

        X_train_with_bias, X_valid_with_bias = add_bias(X_train), add_bias(X_valid)
        onehot_matrix = y_to_onehot(y_train, n_class)
        val_onehot_matrix = y_to_onehot(y_valid, n_class)

        # weight initalization (Xavier)
        fan_in = n_feature
        fan_out = n_class

        sigma_W = 2 / (fan_in + fan_out)
        self.W = sigma_W * np.random.randn(n_class, n_feature + 1)


        # learning algorithm
        for iteration in range(self.max_iter):
            # train loss
            class_score_matrix = np.dot(self.W, X_train_with_bias.T)
            class_probability = np.apply_along_axis(softmax, 0, class_score_matrix)
            train_loss = - (onehot_matrix * np.log(class_probability)).sum() / train_data_len

            # parameters update
            gradient = np.dot((class_probability - onehot_matrix), X_train_with_bias) / train_data_len
            self.W = self.W - self.eta * gradient

            # validation loss
            val_class_score_matrix = np.dot(self.W, X_valid_with_bias.T)
            val_class_probability = np.apply_along_axis(softmax, 0, val_class_score_matrix)
            val_loss = - (val_onehot_matrix * np.log(val_class_probability)).sum() / val_data_len

            # monitoring
            if iteration % 500 == 0:
                print("iter : {}, train_loss : {}, valid_loss : {}".format(iteration, 
                                                                           round(train_loss,4), 
                                                                           round(val_loss,4)))

            if val_loss < min_loss:
                min_loss = val_loss

                # saving history
                epochs.append(iteration)
                losses.append(min_loss)
                weights.append(self.W)

            # stopping rule
            else :
                patience += 1
                if patience == patience_upper: 
                    terminate = True
                    print('** Early Stopped **') 

            # End flow
            if iteration == self.max_iter - 1:
                terminate = True
                
            if terminate == True:
                self.history['epochs'] = epochs
                self.history['val_losses'] = losses
                self.history['weights'] = weights
                break
    
        return self.history


    def predict(self, X, return_prob = False):
        X_with_bias = add_bias(X)
            
        final_W = self.history['weights'][-1]
            
        class_score_matrix = np.dot(final_W, X_with_bias.T)
        class_probability = np.apply_along_axis(softmax, 0, class_score_matrix)
            
        if return_prob == True:
            return class_probability
            
        prediction = class_probability.argmax(axis = 0)
        return prediction

In [117]:
sr = SoftmaxRegressor()

history = sr.fit(X_train, y_train, X_valid, y_valid)

iter : 0, train_loss : 2.7599, valid_loss : 2.7676
iter : 500, train_loss : 0.448, valid_loss : 0.4775
iter : 1000, train_loss : 0.3593, valid_loss : 0.3865
iter : 1500, train_loss : 0.3093, valid_loss : 0.3351
iter : 2000, train_loss : 0.2748, valid_loss : 0.2998
iter : 2500, train_loss : 0.2492, valid_loss : 0.2736
iter : 3000, train_loss : 0.2295, valid_loss : 0.2533
iter : 3500, train_loss : 0.2138, valid_loss : 0.237
iter : 4000, train_loss : 0.201, valid_loss : 0.2236
iter : 4500, train_loss : 0.1904, valid_loss : 0.2124
iter : 5000, train_loss : 0.1814, valid_loss : 0.2028


In [120]:
print(sr.predict(X_test))
print(y_test)

[1 0 1 1 0 1 2 2 0 1 2 2 0 2 0 1 2 2 1 2 1 1 2 2 0 1 2 0 1 2]
[1 0 1 1 0 1 2 2 0 1 2 2 0 2 0 1 2 2 1 2 1 1 2 2 0 1 2 0 1 2]
