softmax 예제

In [1]:
import numpy as np
a=np.array([0.3,2.9,4.0])

exp_a = np.exp(a)
print(exp_a)

sum_exp_a = np.sum(exp_a)
print(sum_exp_a)

y=exp_a / sum_exp_a
print(y)

[ 1.34985881 18.17414537 54.59815003]
74.1221542101633
[0.01821127 0.24519181 0.73659691]


softmax 함수

In [2]:
def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y=exp_a / sum_exp_a

    return y

softmax : 지수함수 너무 커서 overflow 발생  
그래서 입력 신호 중 최댓값을 사용하여 값 조정  
softmax의 지수 함수를 계산할 때 어떤 정수를 더해도 결과 같음!!

In [3]:
a=np.array([1010,1000,990])
np.exp(a)/np.sum(np.exp(a))

c=np.max(a)
print(a-c)

np.exp(a-c) / np.sum(np.exp(a-c))

[  0 -10 -20]


  np.exp(a)/np.sum(np.exp(a))
  np.exp(a)/np.sum(np.exp(a))


array([9.99954600e-01, 4.53978686e-05, 2.06106005e-09])

조정된 softmax 함수

In [4]:
def softmax(a):
    c=np.max(a)
    exp_a = np.exp(a-c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

In [5]:
a=np.array([0.3,2.9,4.0])
y=softmax(a)
print(y)
np.sum(y)

[0.01821127 0.24519181 0.73659691]


1.0

In [6]:
import sys, os
sys.path.append(os.pardir)
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)

print(x_train.shape)
print(t_train.shape)
print(x_test.shape)
print(t_test.shape)

(60000, 784)
(60000,)
(10000, 784)
(10000,)


In [7]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
# PIL.Image: 이미지로 변환하고 화면에 띄우기 위해 사용
from PIL import Image

def img_show(img):
# np.uint8(img): 0~255 범위의 정수로 변환 → 이미지 표현용으로 맞춰주는 것
   pil_img=Image.fromarray(np.uint8(img))
   pil_img.show()

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)

# img : x_train의 첫번째, label : t_train의 첫 번째 정답
img=x_train[0]
label=t_train[0]
print(label)

# (784,)가 출력됨. flatten = True 때문
print(img.shape)

# 이미지를 1차원에서 다시 28x28 크기의 2차원 이미지로 복원
img=img.reshape(28,28)
print(img.shape)

# 최종적으로 숫자 이미지를 화면에 띄움
img_show(img)

5
(784,)
(28, 28)


In [5]:
import sys, os
import importlib
sys.path.append(os.path.join(os.getcwd(), '..'))  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import pickle
import common.functions
importlib.reload(common.functions)
from dataset.mnist import load_mnist

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    if x.ndim == 2:
        x = x - np.max(x, axis=1, keepdims=True)
        x = np.exp(x)
        x /= np.sum(x, axis=1, keepdims=True)
        return x
    x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))

def get_data():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
    return x_test, t_test


def init_network():
    path = os.path.join(os.getcwd(), "sample_weight.pkl")  # 현재 작업 디렉토리 기준
    with open(path, 'rb') as f:
        network = pickle.load(f)
    return network


def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)

    return y


x, t = get_data()
network = init_network()
accuracy_cnt = 0
for i in range(len(x)):
    y = predict(network, x[i])
    p= np.argmax(y) # 확률이 가장 높은 원소의 인덱스를 얻는다.
    if p == t[i]:
        accuracy_cnt += 1

print("Accuracy:" + str(float(accuracy_cnt) / len(x)))

Accuracy:0.9352


In [10]:
x, _ = get_data()
network=init_network()
w1, w2, w3 = network['W1'], network['W2'], network['W3']

print(x.shape)
print(x[0].shape)
print(w1.shape)
print(w2.shape)
print(w3.shape)

(10000, 784)
(784,)
(784, 50)
(50, 100)
(100, 10)


In [12]:
x, t= get_data()
network=init_network()

batch_size=100
accuracy_cnt=0

for i in range(0, len(x), batch_size):
    x_batch=x[i:i+batch_size]
    y_batch=predict(network, x_batch)
    p=np.argmax(y_batch, axis=1)
    accuracy_cnt+=np.sum(p==t[i:i+batch_size])

print("Accuracy:" + str(float(accuracy_cnt/len(x))))

Accuracy:0.9352


In [14]:
list(range(0,10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [15]:
list(range(0,10,3))

[0, 3, 6, 9]

In [16]:
x=np.array([[0.1,0.8,0.1],[0.3,0.1,0.6],[0.2,0.5,0.3],[0.8,0.1,0.1]])
# argmax : 최댓값의 index 가져오기
y=np.argmax(x,axis=1)
print(y)

[1 2 1 0]


In [17]:
y=np.array([1,2,1,0])
t=np.array([1,2,0,0])
print(y==t)
np.sum(y==t)

[ True  True False  True]


3