In [1]:
import time

import numpy as np

import matplotlib.pyplot as plt

from keras.datasets import mnist

from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
# download mnist data and split into train and test sets
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [3]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


In [4]:
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)
print(Y_train.shape)
print(Y_test.shape)

(60000, 10)
(10000, 10)


In [0]:
def sigmoid(x, func = 'relu'):
    return np.maximum(x, 0, x)
    np.maximum()
    #return 1.0 / (1.0 - np.exp(-x))

def predict(x, W1, W2):
    x = np.matrix(x)
    return sigmoid(x @ W1) @ W2

def train(x, y, W1):
    X = np.matrix(x)
    Y = np.matrix(y)
    H = np.matrix(sigmoid(X @ W1))
    U, S, Vt = np.linalg.svd(H, full_matrices = False)
    V = np.matrix(Vt).T
    iH = np.matrix(V) @ np.matrix(np.diag(S)).I @ np.matrix(U).T
    W2 = iH @ Y
    return H @ W2 - Y, W2

In [0]:
# Input dimension
N = X_train.shape[1]
# Hidden layer dimension
M = 1100
# Output dimension
O = 10
np.random.seed(10)
W1 = np.matrix(np.random.normal(size=[N, M]))
W2 = np.matrix(np.random.normal(size=[M, O]))

In [7]:
t0 = time.time()
H, W2 = train(X_train, Y_train, W1)
print(time.time() - t0)

16.56381630897522


In [8]:
print(H.shape)
print(W1.shape)
print(W2.shape)
X_test[0].shape
print(Y_test[0])

(60000, 10)
(784, 1100)
(1100, 10)
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]


In [9]:
pred = predict(X_test, W1, W2)
print(pred.view())
print(Y_test.view())

[[ 4.10157983e-02  6.39154304e-02 -2.66201966e-02 ...  8.54596654e-01
  -1.30668376e-01 -2.48093010e-03]
 [ 1.83975443e-01 -1.33513281e-01  7.34451949e-01 ...  3.84744128e-02
   8.59507660e-02  6.65011171e-02]
 [ 2.06442414e-03  8.60770553e-01 -9.88084749e-05 ...  8.77102440e-02
  -5.13171607e-03 -4.74000993e-02]
 ...
 [ 5.53432108e-03 -7.37397380e-02 -7.51237079e-02 ...  1.69944375e-01
   3.51951041e-02  2.10261942e-02]
 [-1.18923091e-01  3.59383344e-02  7.74162757e-02 ... -4.70376489e-02
   2.95182326e-01 -7.42775857e-02]
 [-7.59922905e-02 -1.70102125e-02 -9.56721419e-03 ...  1.26989999e-01
  -7.16600164e-02 -4.58734366e-02]]
[[0. 0. 0. ... 1. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [10]:
pred.shape

(10000, 10)

In [11]:
correct = 0
total = pred.shape[0]
for i in range(total):
  predicted = np.argmax(pred[i])
  test = np.argmax(Y_test[i])
  correct += (1 if predicted == test else 0)

print(f'Correct: {correct} out of {total} for an accuracy: {correct / total}')

Correct: 9472 out of 10000 for an accuracy: 0.9472
