In [1]:
import numpy as np
import gzip
import struct

from matplotlib import pyplot as plt

In [2]:
def get_labels():
    with gzip.open("train_labels.gz", 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.fromstring(f.read(), dtype=np.uint8).reshape(shape)
    
def get_features():
    with gzip.open("train_features.gz", 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.fromstring(f.read(), dtype=np.uint8).reshape(shape)
    
def plot_image(image):
    plt.imshow(image.reshape((28,28)), cmap="gray")
    
labels = get_labels()
features = get_features().reshape((60000, 784))

  """
  # This is added back by InteractiveShellApp.init_path()


In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(0 - x))

def d_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

def squared_loss(y, x):
    return (y - x) ** 2

def d_squared_loss(y, x):
    return - 2 * (y - x)

In [48]:
def run(xs, w_1, w_2, b_1, b_2):
    """
    :param x: the matrix of input data, shape (N, D)
    """
    N = xs.shape[1]
    D = xs.shape[0]
    f1 = w_1 @ xs
    f2 = (b_1 + f1.T).T
    f3 = sigmoid(f2)
    f4 = w_2 @ f3
    f5 = (b_2 + f4.T).T
    f6 = sigmoid(f5)
    f7 = squared_loss(xs, f6)
    f8 = np.sum(f7, axis=0)
    f9 = np.sum(f8, axis=0)
    f10 = f9 / N
    
    t10 = 1
    t9 = 1 / N
    t8 = np.full((N), 1 / N)
    t7 = np.full((D, N), 1 / N)
    t6 = d_squared_loss(xs, f6) * t7
    t5 = (d_sigmoid(f5)) * t6
    t4 = t5
    tb2 = np.sum(t5, axis=1)

    t3 = w_2.T @ t4
    print(f3.shape, t4.shape)
    tw2 = (f3 @ t4.T).T

    t2 = d_sigmoid(f2) * t3

    t1 = t2
    tb1 = np.sum(t2, axis=1)
    tw1 = t1 @ xs.T
    print(tw1.shape)
    return tw1, tw2, tb1, tb2

In [54]:
w1 = np.random.normal(0, 1, (30, 784))
w2 = np.random.normal(0, 1, (784, 30))
b1 = np.random.normal(0, 1, (30))
b2 = np.random.normal(0, 1, (784))

features = features / 255

run(features[0:10].T, w1, w2, b1, b2)

(30, 10) (784, 10)
(30, 784)


(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0.12983126, 0.13480759, 0.08803568, ..., 0.08283627, 0.11203069,
         0.14511589],
        [0.15083146, 0.1566127 , 0.10227544, ..., 0.09623503, 0.13015164,
         0.16858836],
        [0.01901636, 0.01974524, 0.01289457, ..., 0.01213301, 0.01640912,
         0.0212551 ],
        ...,
        [0.07754306, 0.08051522, 0.05258022, ..., 0.04947481, 0.06691148,
         0.08667196],
        [0.07465204, 0.07751339, 0.05061988, ..., 0.04763026, 0.06441684,
         0.08344059],
        [0.15939853, 0.16550814, 0.10808458, ..., 0.10170108, 0.13754412,
         0.17816401]]),
 array([ 0.99092101,  2.64079968,  2.0926991 ,  2.5576804 ,  1.66809044,
        -0.66285343,  2.96573156,  0.72659597,  0.20792315,  0.12993034,
        -0.274