In [21]:
%matplotlib inline

import numpy as np
import sys
from random import shuffle
from sklearn.datasets import fetch_mldata

In [3]:
mnist = fetch_mldata('MNIST original')
mnist

{'DESCR': 'mldata.org dataset: mnist-original',
 'COL_NAMES': ['label', 'data'],
 'target': array([0., 0., 0., ..., 9., 9., 9.]),
 'data': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)}

In [4]:
mnist.target.shape, mnist.data.shape

((70000,), (70000, 784))

In [5]:
784 ** (1/2)

28.0

In [6]:
%run /Users/dimatomp/Documents/cnn-mnist/layer.py

In [23]:
baseLayer = InitialLayer((28, 28))
conv1 = ConvolutionLayer(baseLayer, (7, 7))
conv2 = ConvolutionLayer(baseLayer, (7, 7))
conv3 = ConvolutionLayer(baseLayer, (7, 7))
conv4 = ConvolutionLayer(baseLayer, (7, 7))
sub1 = SubsampleLayer(conv1, (2, 2))
sub2 = SubsampleLayer(conv2, (2, 2))
sub3 = SubsampleLayer(conv3, (2, 2))
sub4 = SubsampleLayer(conv4, (2, 2))
comb = CombineLayer(sub1, sub2, sub3, sub4)
full1 = FullLayer(comb, (10, 10))
result = FullLayer(full1, (1, 10), activation='linear')

In [25]:
a = list(zip(mnist.data, mnist.target))
shuffle(a)
lossFunc = None
prevLossFunc = None
learnRate = 0.01
for i, (data, target) in enumerate(a[:60000]):
    if i > 0 and i % 5000 == 0:
        print(chr(13) + str(i), lossFunc, learnRate, file=sys.stderr)
        #if prevLossFunc is not None and lossFunc > prevLossFunc:
        #    learnRate *= 0.5
        #prevLossFunc = lossFunc
    result.set_input(data.reshape((28, 28)) / 256)
    result.forward()
    exp = np.exp(result.output)
    sumExp = exp.sum()
    probs = exp / sumExp
    sumExpMinus = sumExp - exp
    label = np.where(np.arange(0, 10) == int(target), 1, 0)
    sumSecond = (1 - label) / sumExpMinus
    sumSecond = sumSecond.sum() - sumSecond
    cLoss = 10 * np.log(sumExp) - (label * result.output + (1 - label) * np.log(sumExpMinus)).sum()
    lossFunc = cLoss if lossFunc is None else lossFunc * 0.999 + cLoss * 0.001
    lossDeriv = 10 * probs - label - exp * sumSecond
    result.backward(lossDeriv, learnRate)
    
# Print the training log loss
lossFunc

5000 0.4748706772164419 0.01


10000 0.5331561820441284 0.01


15000 0.508029485985544 0.01


20000 0.45772253465530005 0.01


25000 0.4864931226002865 0.01


30000 0.42060607659104143 0.01


35000 0.4817470543688176 0.01


40000 0.4248827896179317 0.01


45000 0.39173160092144743 0.01


50000 0.40354486893605723 0.01


55000 0.390698589703921 0.01


0.35868258460331404

In [27]:
lossFunc = 0
nHits = 0
for data, target in a[60000:]:
    result.set_input(data.reshape((28, 28)) / 256)
    result.forward()
    if np.argmax(result.output) == int(target):
        nHits += 1
    exp = np.exp(result.output)
    sumExp = exp.sum()
    sumExpMinus = sumExp - exp
    label = np.where(np.arange(0, 10) == int(target), 1, 0)
    lossFunc += 10 * np.log(sumExp) - (label * result.output + (1 - label) * np.log(sumExpMinus)).sum()
    
# Print the test log loss and accuracy
lossFunc / (len(a) - 60000), nHits / (len(a) - 60000)

(0.35795235755533156, 0.938)

In [30]:
import pickle

with open('testmodel.dat', 'wb') as f:
    pickle.dump(result, f)