In [1]:
import numpy as np

def softmax(a) :
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    
    return y

In [2]:
a = np.array([0.3, 2.9, 4.0])
exp_a = np.exp(a)
sum_exp_a = np.sum(exp_a)
y = exp_a / sum_exp_a

In [3]:
print(a)
print(exp_a)
print(sum_exp_a)
print(y)
print(sum(y))

[0.3 2.9 4. ]
[ 1.34985881 18.17414537 54.59815003]
74.1221542101633
[0.01821127 0.24519181 0.73659691]
1.0


## class로 softmax 구현

In [4]:
import numpy as np


class SoftmaxRegression:
    def __init__(self, learning_rate=0.01, threshold=0.01, max_iterations=100000, verbose=False, reg_strength=1e-5):
        self._learning_rate = learning_rate  # 학습 계수
        self._max_iterations = max_iterations  # 반복 횟수
        self._threshold = threshold  # 학습 중단 계수
        self._verbose = verbose  # 중간 진행사항 출력 여부
        self._reg_strength = reg_strength # 정규화 파라미터 계수

    # theta(W) 계수들 return
    def get_coeff(self):
        return self._W

    # softmax function
    def softmax_func(self, x_data):
        predictions = x_data - (x_data.max(axis=1).reshape([-1, 1]))
        softmax = np.exp(predictions)
        softmax /= softmax.sum(axis=1).reshape([-1, 1])
        return softmax

        # prediction result example
        # [[0.01821127 0.24519181 0.73659691]
        # [0.87279747 0.0791784  0.04802413]
        # [0.05280815 0.86841135 0.0787805 ]]

    # cost function 정의
    def cost_func(self, softmax, y_data):
        sample_size = y_data.shape[0]

        # softmax[np.arange(len(softmax)), np.argmax(y_data, axis=1)]
        # --> 해당 one-hot 의 class index * 해당 유닛의 출력을 각 row(1개의 input row)에 대해 계산
        # --> (n, 1) 의 shape
        cost = -np.log(softmax[np.arange(len(softmax)), np.argmax(y_data, axis=1)]).sum() 
        cost /= sample_size
        cost += (self._reg_strength * (self._W**2).sum()) / 2
        return cost

    # gradient 계산 (regularized)
    def gradient_func(self, softmax, x_data, y_data):
        sample_size = y.shape[0]

        # softmax cost function의 미분 결과는 pi−yi 이므로,
        # softmax가 계산된 matrix에서, (해당 one-hot 의 class index * 해당 유닛)에 해당하는 유닛 위치에 -1을 더해줌.
        softmax[np.arange(len(softmax)), np.argmax(y_data, axis=1)] -= 1
        gradient = np.dot(x_data.transpose(), softmax) / sample_size
        gradient += self._reg_strength * self._W
        return gradient

    # learning
    def fit(self, x_data, y_data):
        num_examples, num_features = np.shape(x_data)
        num_classes = y.shape[1]

        # 가중계수 초기화
        self._W = np.random.randn(num_features, num_classes) / np.sqrt(num_features / 2)

        for i in range(self._max_iterations):
            
            # y^ 계산
            z = np.dot(x_data, self._W)
            softmax = self.softmax_func(z)

            # cost 함수
            cost = self.cost_func(softmax, y_data)

            # softmax 함수의 gradient (regularized)
            gradient = self.gradient_func(softmax, x_data, y_data)

            # gradient에 따라 theta 업데이트
            self._W -= self._learning_rate * gradient

            # 판정 임계값에 다다르면 학습 중단
            if cost < self._threshold:
                return False

            # 100 iter 마다 cost 출력
            if (self._verbose == True and i % 100 == 0):
                print ("Iter(Epoch): %s, Loss: %s" % (i, cost))

    # prediction
    def predict(self, x_data):
        return np.argmax(x_data.dot(self._W), 1)

In [5]:
import numpy as np
#from sklearn import datasets


class LogisticRegression:
    def __init__(self, learning_rate=0.01, threshold=0.01, max_iterations=100000, fit_intercept=True, verbose=False):
        self._learning_rate = learning_rate  # 학습 계수
        self._max_iterations = max_iterations  # 반복 횟수
        self._threshold = threshold  # 학습 중단 계수
        self._fit_intercept = fit_intercept  # 절편 사용 여부를 결정
        self._verbose = verbose  # 중간 진행사항 출력 여부

    # theta(W) 계수들 return
    def get_coeff(self):
        return self._W

    # 절편 추가
    def add_intercept(self, x_data):
        intercept = np.ones((x_data.shape[0], 1))
        return np.concatenate((intercept, x_data), axis=1)

    # 시그모이드 함수(로지스틱 함수)
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def cost(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

    def fit(self, x_data, y_data):
        num_examples, num_features = np.shape(x_data)

        if self._fit_intercept:
            x_data = self.add_intercept(x_data)

        # weights initialization
        self._W = np.zeros(x_data.shape[1])

        for i in range(self._max_iterations):
            z = np.dot(x_data, self._W)
            hypothesis = self.sigmoid(z)

            # 실제값과 예측값의 차이
            diff = hypothesis - y_data

            # cost 함수
            cost = self.cost(hypothesis, y_data)

            # cost 함수의 편미분 : transposed X * diff / n
            # 증명 : https://stats.stackexchange.com/questions/278771/how-is-the-cost-function-from-logistic-regression-derivated
            gradient = np.dot(x_data.transpose(), diff) / num_examples

            # gradient에 따라 theta 업데이트
            self._W -= self._learning_rate * gradient

            # 판정 임계값에 다다르면 학습 중단
            if cost < self._threshold:
                return False

            # 100 iter 마다 cost 출력
            if (self._verbose == True and i % 100 == 0):
                print('cost :', cost)

    def predict_prob(self, x_data):
        if self._fit_intercept:
            x_data = self.add_intercept(x_data)

        return self.sigmoid(np.dot(x_data, self._W))

    def predict(self, x_data):
        # 0,1 에 대한 판정 임계값은 0.5 -> round 함수로 반올림
        return self.predict_prob(x_data).round()