In [25]:
from tensorflow.examples.tutorials.mnist import input_data

In [26]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [28]:
import numpy as np
import math

In [29]:
def sigma(x):
    return 1. / (1. + np.exp(-x)) 

def sigma_prime(x):
    return sigma(x) * (1. - sigma(x))

In [30]:
def log_softmax(x):
    xdev = x - x.max(1, keepdims=True)
    return xdev - np.log(np.sum(np.exp(xdev), axis=1, keepdims=True))

In [39]:
class NN:
    def __init__(self, input_size=784, output_size=10, hidden_size=500, model_type='normal', momentum_intensity=0.1, noise_intensity=0.01, reg_coef = [0.01, 0.01]):
        init_range_first_layer = math.sqrt(6.0 / (input_size + hidden_size))
        self.W1 = np.array([np.random.uniform(-init_range_first_layer, init_range_first_layer, hidden_size) for i in range(input_size)])
        self.b1 = [np.zeros(500)]
        init_range_second_layer = math.sqrt(6.0 / (500 + output_size))
        self.W2 = np.array([np.random.uniform(-init_range_second_layer, init_range_second_layer, output_size) for i in range(hidden_size)])
        self.b2 = [np.zeros(10)]
        self.model_type = model_type
        
        if self.model_type == 'past_momentum':
            self.past_momentums = []
            self.momentum_intensity = momentum_intensity
            
        if self.model_type == 'noise':
            self.noise_intensity = noise_intensity
            
        if self.model_type == 'regularization':
            self.reg_coef_1 = reg_coef[0]
            self.reg_coef_2 = reg_coef[1]
    
    def train(self, X, Y, learning_rate=0.05):
        num_examples = X.shape[0]
        layer_1 = np.matmul(X, self.W1) + self.b1
        activated_1 = sigma(layer_1)
        
        layer_2 = np.matmul(layer_1, self.W2) + self.b2
        exp_scores = np.exp(layer_2)
        y_pred = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        
        loss =  Y * -log_softmax(layer_2)
        loss = np.mean(np.sum(loss, axis=1))

        d_layer_2 = (y_pred - Y) / num_examples
        d_b2 = np.sum(d_layer_2, axis=0, keepdims=True)
        d_W2 = np.matmul(np.transpose(activated_1), d_layer_2)

        d_loss_1 = np.matmul(d_layer_2, np.transpose(self.W2))
        d_sigma_1 = sigma_prime(layer_1)
        d_layer_1 = d_loss_1 * d_sigma_1
        d_b1 = np.sum(d_layer_1, axis=0, keepdims=True)
        d_W1 = np.matmul(np.transpose(X), d_layer_1)

        
        self.W1 -= learning_rate * d_W1
        self.b1 -= learning_rate * d_b1
        self.W2 -= learning_rate * d_W2
        self.b2 -= learning_rate * d_b2
        
        if self.model_type == 'past_momentum':
            if len(self.past_momentums) > 0:
                self.W1 += self.momentum_intensity * self.past_momentums[0]
                self.b1 += self.momentum_intensity * self.past_momentums[1]
                self.W2 += self.momentum_intensity * self.past_momentums[2]
                self.b2 += self.momentum_intensity * self.past_momentums[3]
            self.past_momentums = [d_W1, d_b1, d_W2, d_b2]
            
        if self.model_type == 'noise':
            self.W1 += np.random.normal(0,self.noise_intensity)
            self.b1 += np.random.normal(0,self.noise_intensity)
            self.W2 += np.random.normal(0,self.noise_intensity)
            self.b2 += np.random.normal(0,self.noise_intensity)
                
        return loss
    
    
    def predict(self, X):
        layer_1 = np.matmul(X, self.W1) + self.b1
        activated_1 = sigma(layer_1)

        layer_2 = np.matmul(layer_1, self.W2) + self.b2
        exp_scores = np.exp(layer_2)
        y_pred = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
        return np.argmax(y_pred, axis=1)
    
    def calculate_accuracy(self, X, Y):
        prediction = self.predict(X)
        return np.mean(prediction == np.argmax(Y, axis=1)) * 100
    
#     def calculate_regulation_term(order):
#         weight_list = 
#         if order == 1:
#             return [ for ]
        

In [40]:
model = NN()
TRAINING_EPOCHS = 50
BATCH_SIZE = 500

In [41]:
for epoch in range(TRAINING_EPOCHS):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / BATCH_SIZE)
    
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
        c = model.train(batch_xs, batch_ys)
        avg_cost += c / total_batch

    print('Epoch: ' + str((epoch + 1)) + ' cost: ' + str((avg_cost)))
    print('Precision: ' + str(model.calculate_accuracy(mnist.test.images, mnist.test.labels)))

print('Learning Finished!')


Epoch: 1 cost: 1.40441422388
Precision: 80.62
Epoch: 2 cost: 0.764362547611
Precision: 85.06
Epoch: 3 cost: 0.603306835619
Precision: 86.64
Epoch: 4 cost: 0.527544852211
Precision: 87.59
Epoch: 5 cost: 0.482944734145
Precision: 88.39
Epoch: 6 cost: 0.452747029397
Precision: 88.98
Epoch: 7 cost: 0.430847841442
Precision: 89.16
Epoch: 8 cost: 0.413731829362
Precision: 89.53
Epoch: 9 cost: 0.400174752907
Precision: 89.81
Epoch: 10 cost: 0.388903523361
Precision: 89.86
Epoch: 11 cost: 0.379757916481
Precision: 90.18
Epoch: 12 cost: 0.371843931334
Precision: 90.14
Epoch: 13 cost: 0.36500006586
Precision: 90.41
Epoch: 14 cost: 0.359152875081
Precision: 90.55
Epoch: 15 cost: 0.353860027858
Precision: 90.63
Epoch: 16 cost: 0.349219820837
Precision: 90.71
Epoch: 17 cost: 0.345106263302
Precision: 90.75
Epoch: 18 cost: 0.341333116451
Precision: 90.96
Epoch: 19 cost: 0.33801470679
Precision: 90.86
Epoch: 20 cost: 0.334803230794
Precision: 90.98
Epoch: 21 cost: 0.331977984456
Precision: 91.09
Epoc

In [12]:
a = np.array([1,2])

In [13]:
a - np.random.normal(0,0.01)

array([ 0.99256517,  1.99256517])