In [5]:
import gzip
import numpy as np
import matplotlib
import time
%pylab inline

Populating the interactive namespace from numpy and matplotlib


## Simple Keras 92% 16s For Comparison

In [6]:
import keras
from keras.datasets import mnist
from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D  
from keras.models import Sequential

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = np.expand_dims(X_train, axis=3)
X_test = np.expand_dims(X_test, axis=3)
X_train = keras.utils.normalize(X_train, axis=1)
X_test = keras.utils.normalize(X_test, axis=1)
Y_train = keras.utils.to_categorical(Y_train)
Y_test = keras.utils.to_categorical(Y_test)

model = Sequential()
model.add(Dense(28, name='dense_in', activation='relu', input_shape=(28,28,1)))
model.add(Flatten(name='flat'))
model.add(Dense(10, name='dense_last', activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=32, epochs=1, verbose=1)
results = model.evaluate(X_test, Y_test, batch_size=32)
print('test loss, test acc:', results)

Epoch 1/1
test loss, test acc: [0.28741244918704034, 0.921999990940094]


## Issues
- Try Cross Entropy Loss
- Find why algo so slow
- ***Working*** - Batching
- Dropout
- Max Pooling
- Each pass is fast, but algo still slow, batch might help

## NUMPY ATTEMPT 90% 45s

In [7]:
# Setup
start = time.time()
epochs = 10000
lr = 0.01
batch = 16

# Data
print('Importing MNIST Data')
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 784)/255
X_test = X_test.reshape(-1, 784)/255
from keras.utils.np_utils import to_categorical
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)

# Layers
print('Setup')
w0 = np.random.randn(64, 784)*np.sqrt(1/(64+784)) # Xavier Initialization
w1 = np.random.randn(32, 64)*np.sqrt(1/(32+64))
out = np.random.randn(10, 32)*np.sqrt(1/(10+32))

def shuffl3(x, y):
    '''
    Shuffle the order of incoming images
    '''
    assert len(x) == len(y)
    ids = numpy.random.permutation(len(x))
    return x[ids], y[ids]

def for_back_pass(x, y, backpass=True):
    '''
    x is the incoming singular image
    y is the label such as [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    backpass is True by default. Set as true if you want to correct weights. False if you want to leave weights alone.
    '''
    # Forward pass
    forward_start = time.time()
    res_w0 = np.dot(w0, x)
    res_rel0 = np.maximum(res_w0, 0)
    res_w1 = np.dot(w1, res_rel0)
    res_rel1 = np.maximum(res_w1, 0)
    res_out = np.dot(out, res_rel1)
    #https://www.youtube.com/watch?v=mlaLLQofmR8 softmax video
    guess = np.exp(res_out - res_out.max()) / np.sum(np.exp(res_out - res_out.max()), axis=0) # Softmax eqn I found somewhere
    loss = abs((guess - y)).mean(axis=0)
    correct = (np.argmax(y) == np.argmax(guess))
    error = (guess - y)
    
    # Backward Prop
    if backpass:
        dd = guess*(1-guess)
        error = error * dd
        dx_out = np.outer(error, res_rel1)
        error = np.dot(out.T, error) * (res_rel1 > 0)
        dx_w1 = np.outer(error, res_rel0)
        error = np.dot(w1.T, error) * (res_rel0 > 0)
        dx_w0 = np.outer(error, x)
    else:
        dx_out, dx_w0, dx_w1 = 0, 0, 0
    
    return dx_out, dx_w0, dx_w1, guess, loss, correct

# Loop
loss_list = []
print('Running {} epochs'.format(epochs))
vold_dx_out = 0
vold_dx_w0 = 0
vold_dx_w1 = 0
old_dx_out = 0
old_dx_w0 = 0
old_dx_w1 = 0
backpass = True
validate = True
for epoch in range(epochs):
    temp_loss = []
    correct = []
    solver = 'my_momentum_v2'
    if batch == 1:
        X = X_train
        Y = Y_train
        X, Y = shuffl3(X, Y)
        for x, y in zip(X, Y):
            dx_out, dx_w0, dx_w1, guess, loss, correcti = for_back_pass(x, y, backpass=backpass)
            if backpass:
                if solver == 'my_momentum_v2':
                    out = out - lr*dx_out - 0.5*lr*old_dx_out - 0.25*lr*vold_dx_out
                    w0 = w0 - lr*dx_w0 - 0.5*lr*old_dx_w0 - 0.25*lr*vold_dx_w0
                    w1 = w1 - lr*dx_w1 - 0.5*lr*old_dx_w1 - 0.25*lr*vold_dx_w1
                    # Trying Momentum
                    vold_dx_out = old_dx_out
                    vold_dx_w0 = old_dx_w0
                    vold_dx_w1 = old_dx_w1
                    old_dx_out = dx_out
                    old_dx_w0 = dx_w0
                    old_dx_w1 = dx_w1
                elif solver == 'adam':
                    pass

            correct.append(correcti)
            
    else: # batching will require more epochs
        ids = [randint(0, X_train.shape[0]) for i in range(batch)]
        X = X_train[ids]
        Y = Y_train[ids]
        dx_out_l = np.zeros_like(out)
        dx_w0_l = np.zeros_like(w0)
        dx_w1_l = np.zeros_like(w1)
        loss_l = []
        correcti_l = []
        for x, y in zip(X, Y):
            dx_out, dx_w0, dx_w1, guess, loss, correcti = for_back_pass(x, y, backpass=backpass)
            dx_out_l += dx_out
            dx_w0_l += dx_w0
            dx_w1_l += dx_w1
            loss_l.append(loss)
            correcti_l.append(correcti)
        dx_out = dx_out_l / batch
        dx_w0 = dx_w0_l /batch
        dx_w1 = dx_w1_l /batch
        loss = sum(loss_l)/batch
        correcti = sum(correcti_l)/batch
        if backpass:
            if solver == 'my_momentum_v2':
                out = out - lr*dx_out - 0.5*lr*old_dx_out - 0.25*lr*vold_dx_out
                w0 = w0 - lr*dx_w0 - 0.5*lr*old_dx_w0 - 0.25*lr*vold_dx_w0
                w1 = w1 - lr*dx_w1 - 0.5*lr*old_dx_w1 - 0.25*lr*vold_dx_w1
                # Trying Momentum
                vold_dx_out = old_dx_out
                vold_dx_w0 = old_dx_w0
                vold_dx_w1 = old_dx_w1
                old_dx_out = dx_out
                old_dx_w0 = dx_w0
                old_dx_w1 = dx_w1
            elif solver == 'adam':
                pass

        correct.append(correcti)
        
    correct_percent = sum(correct) / len(correct)
    loss_list.append(loss)
    if epochs > 10000:
        if epoch % 10000 == 0:
            print('Epoch{} Time = {}s loss={} accuracy = {}'.format(epoch, time.time() - start, loss, correct_percent))
    elif epochs > 1000:
        if epoch % 1000 == 0:
            print('Epoch{} Time = {}s loss={} accuracy = {}'.format(epoch, time.time() - start, loss, correct_percent))
    elif epochs > 100:
        if epoch % 100 == 0:
            print('Epoch{} Time = {}s loss={} accuracy = {}'.format(epoch, time.time() - start, loss, correct_percent))
    elif epochs < 100:
        print('Epoch{} Time = {}s loss={} accuracy = {}'.format(epoch, time.time() - start, loss, correct_percent))
print('Final Epoch Result')
print('Epoch{} Time = {}s loss={} accuracy = {}'.format(epoch, time.time() - start, loss, correct_percent))
        
if validate:
    print()
    print('Validating...')
    X = X_test
    Y = Y_test
    X, Y = shuffl3(X, Y)
    correct_l = []
    for x, y in zip(X, Y):
        dx_out, dx_w0, dx_w1, guess, loss, correcti = for_back_pass(x, y, backpass=False)
        correct_l.append(correcti)
    correct_percent = sum(correct_l) / len(correct_l)
    print()
    print()
    print()
    print('######################################')
    print('VALIDATION CORRECT = {}'.format(correct_percent))
    print('######################################')
    print()
    print()

Importing MNIST Data
Setup
Running 10000 epochs
Epoch0 Time = 0.5733880996704102s loss=0.17997360085112396 accuracy = 0.0625
Epoch1000 Time = 4.913451433181763s loss=0.17586762949243145 accuracy = 0.25
Epoch2000 Time = 9.274529457092285s loss=0.1192390267613016 accuracy = 0.625
Epoch3000 Time = 13.661634922027588s loss=0.08462221409664564 accuracy = 0.6875
Epoch4000 Time = 18.089749574661255s loss=0.08985055064347795 accuracy = 0.6875
Epoch5000 Time = 22.486852407455444s loss=0.06234810262591403 accuracy = 0.875
Epoch6000 Time = 26.902969360351562s loss=0.07767183326828908 accuracy = 0.75
Epoch7000 Time = 31.22402787208557s loss=0.03166464271126266 accuracy = 0.9375
Epoch8000 Time = 35.65115547180176s loss=0.05187182093977273 accuracy = 0.8125
Epoch9000 Time = 40.06325602531433s loss=0.04865865544904874 accuracy = 0.875
Final Epoch Result
Epoch9999 Time = 44.80560278892517s loss=0.0185971299321406 accuracy = 0.9375

Validating...



######################################
VALIDATION COR

## Testing Area Getting Gradients Working

In [9]:
actual = np.array([[0, 1, 0, 0, 0]])
print('actual = {}'.format(actual))
res_out = np.array([[0, .8, .5, .25, .7]])
res_out = res_out[0]
print('res_out = {}'.format(res_out))
#guess = 1/(1+np.exp(-res_out))
guess = np.exp(res_out - res_out.max()) / np.sum(np.exp(res_out - res_out.max()), axis=0)
print('guess = {}'.format(guess))
error = (actual-guess)
print('error = {}'.format(error))
dx_guess = guess*(1-guess)
print('dx_guess = {}'.format(dx_guess))

print((np.argmax(actual) == np.argmax(guess)))

actual = [[0 1 0 0 0]]
res_out = [0.   0.8  0.5  0.25 0.7 ]
guess = [0.12236846 0.27233602 0.20175148 0.15712421 0.24641982]
error = [[-0.12236846  0.72766398 -0.20175148 -0.15712421 -0.24641982]]
dx_guess = [0.10739442 0.19816911 0.16104782 0.1324362  0.18569709]
True
