In [None]:
import sys, numpy as np
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[:1000])

one_hot_labels = np.zeros((len(labels), 10))
# print(labels[:10])
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape((len(x_test), 28 * 28)) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
    test_labels[i][l] = 1

    
np.random.seed(1)
relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: (x > 0)

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 28 * 28, 10)
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i: i + 1]
        layer_1 = relu(layer_0.dot(weights_0_1))
        layer_2 = layer_1.dot(weights_1_2)
        error += np.sum((labels[i: i + 1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i: i + 1]))
        
        layer_2_delta = (labels[i: i + 1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        
        weights_1_2_delta =  layer_1.T * layer_2_delta
        weights_0_1_delta =  layer_0.T * layer_1_delta
        
        weights_1_2 += alpha * weights_1_2_delta
        weights_0_1 += alpha * weights_0_1_delta
    if (j % 10 == 0 or j == iterations - 1):
        error, correct_cnt = (0.0, 0)
        for i in range(len(test_images)):
            layer_0 = test_images[i: i + 1]
            layer_1 = relu(layer_0.dot(weights_0_1))
            layer_2 = layer_1.dot(weights_1_2)
            error += np.sum((layer_2 - test_labels[i: i + 1]) ** 2)
            correct_cnt += (np.argmax(layer_2) == np.argmax(test_labels[i: i + 1]))
        sys.stdout.write(
        "\r" + 
        " I: " + str(j) +
        " Test-Err: " + str(error / float(len(test_images)))[0:5] +
        " Test-Acc: " + str(correct_cnt / float(len(test_images))))
        print()
#     if j % 10 == 9:
#         print(
#         "\r" + 
#         " I: " + str(j) +
#         " Error: " + str(error / float(len(images)))[0:5] +
#         " Correct: " + str(correct_cnt / float(len(images))))
    


In [4]:
# With dropout
import sys, numpy as np
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[:1000])

one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape((len(x_test), 28 * 28)) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
    test_labels[i][l] = 1

    
np.random.seed(1)
relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: (x > 0)

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 28 * 28, 10)
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i: i + 1]
        layer_1 = relu(layer_0.dot(weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        
        layer_2 = layer_1.dot(weights_1_2)
        error += np.sum((labels[i: i + 1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i: i + 1]))
        
        layer_2_delta = (labels[i: i + 1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2_delta =  layer_1.T * layer_2_delta
        weights_0_1_delta =  layer_0.T * layer_1_delta
        
        weights_1_2 += alpha * weights_1_2_delta
        weights_0_1 += alpha * weights_0_1_delta
    if (j % 10 == 0 or j == iterations - 1):
        new_error, new_correct_cnt = (0.0, 0)
        for i in range(len(test_images)):
            layer_0 = test_images[i: i + 1]
            layer_1 = relu(layer_0.dot(weights_0_1))
            layer_2 = layer_1.dot(weights_1_2)
            new_error += np.sum((layer_2 - test_labels[i: i + 1]) ** 2)
            new_correct_cnt += (np.argmax(layer_2) == np.argmax(test_labels[i: i + 1]))
        print(
        "\r" + 
        " I: " + str(j) +
        " Test-Err: " + str(new_error / float(len(test_images)))[0:5] +
        " Test-Acc: " + str(new_correct_cnt / float(len(test_images))))
        print(
        "\r" + 
        " I: " + str(j) +
        " Error: " + str(error / float(len(images)))[0:5] +
        " Correct: " + str(correct_cnt / float(len(images))))
        print("----------------------------------------")
    


 I: 0 Test-Err: 0.728 Test-Acc: 0.5118
 I: 0 Error: 0.895 Correct: 0.279
----------------------------------------
 I: 10 Test-Err: 0.436 Test-Acc: 0.7854
 I: 10 Error: 0.489 Correct: 0.72
----------------------------------------


KeyboardInterrupt: 

In [156]:
for i in range(1, 1000000, 100):
    a = np.random.randint(2, size=i)
    print(np.sum(a)/len(a))

1.0
0.46534653465346537
0.5422885572139303
0.4983388704318937
0.5635910224438903
0.4930139720558882
0.5074875207986689
0.4964336661911555
0.5056179775280899
0.532741398446171
0.5254745254745254
0.4904632152588556
0.48376353039134057
0.4796310530361261
0.5017844396859387
0.49100599600266487
0.5190505933791381
0.4991181657848324
0.4719600222098834
0.5060494476591267
0.5067466266866567
0.5145168967158495
0.5170377101317583
0.4784876140808344
0.5039566847147022
0.4950019992003199
0.5101883890811226
0.5027767493520918
0.5069617993573724
0.500517063081696
0.5091636121292903
0.5037084811351177
0.5048422368009997
0.4874280521054226
0.5116142311084975
0.49785775492716366
0.515690086087198
0.5139151580653877
0.500394632991318
0.47987695462701874
0.493376655836041
0.4908558888076079
0.5010711735301119
0.5063938618925832
0.5155646443990002
0.5012219506776272
0.5085850901977831
0.5060625398851308
0.5001041449697979
0.5092838196286472
0.4963007398520296
0.4893158204273672
0.5016343010959431
0.504433

0.4997888312148336
0.49987075005782233
0.49766986861591556
0.49987110079917507
0.5015921193479763
0.5010216370549789
0.5009932298212186
0.4995209241440736
0.5003436611366424
0.500370116149177
0.503420652948213
0.49973825854686516
0.49904156780740205
0.5029785411172542
0.5014772529779014
0.4979105752927197
0.501259983200224
0.4997936112701562
0.4992353825082113
0.5017197646777599
0.5020092571716556
0.500125826147998
0.5023875345564212
0.4991083341039088
0.4994525138190789
0.4968182237388177
0.4993618505019671
0.4957096490190668
0.49683075025262136
0.498446940407072
0.4974018664677164
0.4997581730957765
0.4994451769559144
0.5008800406774357
0.5012304527284801
0.5002405690433155
0.49998051973350993
0.5007328050219841
0.5001360086009249
0.5009120192494275
0.4984560922985491
0.501683849240655
0.5017847708148091
0.49825613569966926
0.4980912841737253
0.5004813802133478
0.5002115357495417
0.5010691284362556
0.49847188654876534
0.49915071327313826
0.5008354485274422
0.5024267206787175
0.500400

0.501471620768647
0.4990751305802227
0.5013703868952769
0.5009023290705656
0.4996852921832236
0.5022579345729006
0.4984357455102718
0.4995589304239208
0.5012447020824959
0.4968816287411646
0.4986021225202639
0.5006157592135172
0.4999050639587626
0.4997645835236576
0.49935744833252577
0.5005578908218188
0.5023807687183119
0.4999264195765972
0.49723041155173986
0.5001364581073611
0.502716764218432
0.5009049552414029
0.500436449466135
0.4973238149070837
0.4983716989421273
0.5011742078452415
0.5008391306467225
0.4956750474944503
0.5013386555030911
0.499683810397426
0.4996493079909167
0.4989278353377145
0.501002073494636
0.49829869508873814
0.5010907126681948
0.500176469366994
0.5001763473281651
0.49895992425760705
0.49880180385494577
0.4999137342047329
0.5004517210226136
0.5001343891496268
0.49994834746317174
0.4988128092717875
0.500890640366985
0.5012611597171154
0.49941277875838763
0.4999279346058023
0.5005178290958224
0.49933859260731595
0.5001335607290361
0.5011943792308061
0.499244191

0.5008216742174115
0.5003469777984134
0.49961826537791726
0.5008403948109984
0.5009596163528597
0.49855755576703653
0.5005253957898615
0.5013066137052579
0.5012014865597684
0.5002312270948429
0.49823808032763256
0.5005489292154535
0.4983639604569987
0.49959553550602726
0.5001810506892327
0.5000966777556879
0.5027824440909608
0.4995269959039331
0.5022103850971035
0.5005962365351978
0.5002893160765772
0.5016188748449093
0.4987920020158003
0.49993827190976836
0.49862044116267934
0.4994992624604713
0.5012056153569262
0.5010867368815334
0.49983005009827536
0.5011792162520126
0.4982209733219817
0.5007501192812628
0.5017723610011751
0.4993980373560818
0.4992558975643538
0.49932990019685713
0.4985402426877199
0.5001593910770423
0.5002328419958726
0.4973371027089529
0.5007174303749736
0.4988277100944195
0.5010983312214715
0.5017139280492516
0.5007746785206328
0.4999975574130073
0.5006030243992949
0.500651534155519
0.49997317086258114
0.4990565623765852
0.5005774825658744
0.4983755558911062
0.49

0.4995363581031123
0.5004144531872062
0.5000510201999175
0.4997246033267918
0.4986725176487861
0.49890542639451124
0.5009800286062404
0.5008492837096387
0.5009588723172951
0.49958689626822844
0.5006570355694241
0.49961976567805744
0.5006077211068247
0.4981816408710245
0.5008224986900947
0.5010008079544948
0.5008502400558439
0.4982373296660054
0.5008089991524771
0.5002452361360513
0.5003950551253844
0.500589305025091
0.49998178145027755
0.4997389731324438
0.4984203138336819
0.4998483629261507
0.499456348195844
0.5002202011305005
0.5004624375507368
0.4991905563562521
0.5002118635518017
0.501155703284779
0.49929637380494435
0.5000100765414085
0.499635376166897
0.499828836774721
0.498790262519072
0.5009838994611692
0.5004927574708067
0.49823683861343543
0.5009385010510408
0.5006046580769061
0.5004236930775379
0.4995002027290135
0.5007042507855105
0.5009767309397074
0.5011086563405921
0.49955310800357516
0.5000340543507438
0.5000340407126924
0.5003222565161869
0.4977411054777692
0.500541997

0.5012639929962912
0.5001373808094274
0.5004967768844459
0.5001067792990532
0.4999373095990864
0.4991175504148021
0.497573662127795
0.5000930937945369
0.499670051877997
0.4998765227451869
0.500410888025404
0.49967376716103057
0.49973132905938134
0.500653714007723
0.49841101516036757
0.5000928423604242
0.501911907148474
0.49984986555375993
0.4986290096829353
0.4996948762816039
0.4994556809717527
0.5007732453731625
0.5000825190888545
0.5001969690337743
0.5003988542616821
0.5006914512400699
0.4983770656674549
0.4997595838615203
0.5001764699950588
0.4989364955090877
0.49957507700679543
0.4995349243286624
0.4999211147327468
0.501471471572243
0.5002264333229341
0.5001492282051369
0.49937814489391585
0.5008997959122121
0.5019078663053055
0.5004001995974562
0.4995764995764996
0.49995147271930146
0.5017012321805547
0.5004297644489484
0.4989083954918238
0.4998980618380286
0.4980370930935747
0.5019188312664287
0.5005592635750798
0.5003154195079456
0.5009692994017371
0.5011857865717593
0.499634879

KeyboardInterrupt: 

In [None]:
# with using batch gradient descent
import numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[:1000])


one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape((len(x_test), 28 * 28)) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
    test_labels[i][l] = 1

# np.random.seed(1)

relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: (x > 0)

batch_size = 200
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (28 * 28, 10, 40)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(int(len(images) / batch_size)):
        batch_st, batch_end = (i * batch_size, (i + 1) * batch_size)
        layer_0 = images[batch_st: batch_end]
        layer_1 = relu(layer_0.dot(weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = layer_1.dot(weights_1_2)
        error += np.sum((layer_2 - labels[batch_st: batch_end]) ** 2)
        print(error)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k: k + 1]) == np.argmax(labels[batch_st + k: batch_st + k + 1]))
        
        layer_2_delta = (labels[batch_st: batch_end] - layer_2) / batch_size
        layer_1_delta = layer_2.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
#         print(layer_1_delta)
#         break
            
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
#     break
    test_error = 0.0
    test_correct_cnt = 0
    for i in range(len(test_images)):
        layer_0 = test_images[i: i + 1]
        layer_1 = relu(layer_0.dot(weights_0_1))
        layer_2 = layer_1.dot(weights_1_2)
#     print("I:", j)
#     print("Error: ", error, " test_Error: ", test_error)
#     print("Correct: ", correct_cnt, " test_Correct: ", test_correct_cnt)
#     print("------------------------------")

In [1]:
import numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[:1000].reshape(1000, 28 * 28) / 255, y_train[:1000])

one_hot_labels = np.zeros((len(labels), 10))
for iter, unit in enumerate(labels):
    one_hot_labels[iter][unit] = 1
labels = one_hot_labels

test_images = x_test[:1000].reshape(1000, 28 * 28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1


def tanh(x):
    return np.tanh(x)


def tanh2deriv(output):
    return 1 - (output ** 2)


def softmax(x):
    temp = np.exp(x)
    print(np.sum(temp, axis=1, keepdims=True), temp)
    return temp / np.sum(temp, axis=1, keepdims=True)

alpha, iterations, hidden_size = (2, 300, 40)
pixels_per_image, num_labels = (28 * 28, 10)
batch_size = 100

np.random.seed(1)
weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.02 * np.random.random((hidden_size, num_labels)) - 0.01

for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_st, batch_end = (i * batch_size, (i+1) * batch_size)
        layer_0 = images[batch_st: batch_end]
        layer_1 = tanh(layer_0.dot(weights_0_1))
        
        dropout_mask = np.random = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask
        layer_2 = layer_1.dot(weights_1_2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k: k + 1]) == np.argmax(test_labels[batch_st + k: batch_st + k + 1]))
        print(layer_2.shape[0])
        break
    break
#         layer_2_delta = (labels[])



100
