In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns

In [4]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [5]:
X, y = mnist["data"], mnist["target"]

In [6]:
y = y.astype(np.uint8)

In [7]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

In [8]:
enc.fit(y[:,np.newaxis])

  """Entry point for launching an IPython kernel.


OneHotEncoder()

In [9]:
Y = enc.transform(y[:,np.newaxis]).toarray()

  """Entry point for launching an IPython kernel.


In [10]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], Y[:60000], Y[60000:]

In [11]:
X_train = X_train / 255
X_test = X_test / 255

In [12]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [13]:
def softmax(X, W):
    '''
    X = np.size((N,M))
    W = np.zeros((M,K))
    B = np.size((N,N))
    Y = np.size((N,K))
    '''
    # A.shape = (N,K)
    A = np.exp(X @ W)
    # B.shape = (N,N)
    B = np.diag(1 / (np.reshape(A @ np.ones((K,1)), -1)))
    # Y.shape = (N,K)
    Y = B @ A
    return Y

In [22]:
def compute_cost(X, T, W, lamb=0):
    '''
    X = np.size((N,M))
    W = np.zeros((M,K))
    T = np.size((N,K))
    softmax(X,W).shape = Y.shape = (N,K)
    '''

    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1))
    if lamb : 
        regularization =  lamb * np.ones((1,M)) @ W**2 @ np.ones((K,1))
        cost += regularization
    return cost 

In [15]:
def predict(X, W):
    return np.argmax((X @ W), axis=1)

In [41]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size, lamb = 0):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    print(f"start learing, lambda : {lamb} ")
    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (softmax(X_batch, W) - T_batch))
        if lamb :
            regularization = 2* lamb * W 
            W -= regularization 
        cost_history[i] = compute_cost(X_batch, T_batch, W, lamb)
        if i % 10000 == 0:
            print(cost_history[i][0])
    print(f"end learing, lambda : {lamb} ")
    print(f"============================= ")

    return (cost_history, W)

In [17]:
X_train.shape, y_train.shape,

((60000, 784), (60000, 10))

In [57]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 10000
learning_rate = 0.01

# initial_cost = compute_cost(X, T, W)

# print("Initial Cost is: {} \n".format(initial_cost[0][0]))
lambdas = np.log(np.logspace(1e-6 , 0,num = 10, base = 10))
# (cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64)
histories = []
scores = []
for lamb in lambdas:
    (cost_history_regularization, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64, lamb =lamb)
    histories.append((cost_history_regularization, W_optimal))
    scores.append(Accuracy())

start learing, lambda : 2.3025850930786036e-06 
2.280811458384061
end learing, lambda : 2.3025850930786036e-06 
0.8972
start learing, lambda : 2.0467423049413348e-06 
2.2826700214624074
end learing, lambda : 2.0467423049413348e-06 
0.9036
start learing, lambda : 1.7908995168515582e-06 
2.2632631716708302
end learing, lambda : 1.7908995168515582e-06 
0.9018
start learing, lambda : 1.5350567285872463e-06 
2.277018539846715
end learing, lambda : 1.5350567285872463e-06 
0.8941
start learing, lambda : 1.2792139405925043e-06 
2.2832038403134196
end learing, lambda : 1.2792139405925043e-06 
0.901
start learing, lambda : 1.0233711524232604e-06 
2.282281628459328
end learing, lambda : 1.0233711524232604e-06 
0.904
start learing, lambda : 7.675283643015758e-07 
2.2772275507245827
end learing, lambda : 7.675283643015758e-07 
0.9027
start learing, lambda : 5.116855762274671e-07 
2.276344339267964
end learing, lambda : 5.116855762274671e-07 
0.8951
start learing, lambda : 2.558427882009511e-07 
2.2

In [58]:
index = scores.index(max(scores))
print(f'best score {scores[index]} , lambda : {lambdas[index]}')

TypeError: ignored

In [44]:
def Accuracy():

    X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
    T_ = y_test
    y_pred = predict(X_, W_optimal)
    score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))

    print(score)
    print('===================')
    return score