In [101]:
import scipy.io
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from softmax_loss import softmax_loss
from feedforward_backprop import feedforward_backprop
from utils import *

warnings.filterwarnings("ignore")
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load Data

In [105]:
data = scipy.io.loadmat('data/digit_data.mat')
X = data['X']
y = data['y']
num_cases = X.shape[1]
train_num_cases = num_cases * 4 // 5
X = X.reshape([400, num_cases])
X = np.transpose(X, [1, 0])
# X has the shape of [number of samples, number of pixels]
train_data = X[:train_num_cases]
train_label = y[:, :train_num_cases]
test_data = X[train_num_cases:]
test_label = y[:, train_num_cases:]

# Gradient Check

In [114]:
weights = {}
weights['fully1_weight'] = np.random.randn(400, 25) / 400
weights['fully1_bias'] = np.random.randn(25, 1)
weights['fully2_weight'] = np.random.randn(25, 10) / 25
weights['fully2_bias'] = np.random.randn(10, 1)

fully1_weight_inc = np.zeros_like(weights['fully1_weight'])
fully1_bias_inc = np.zeros_like(weights['fully1_bias'])
fully2_weight_inc = np.zeros_like(weights['fully2_weight'])
fully2_bias_inc = np.zeros_like(weights['fully2_bias'])

EPSILON = 0.00010;

X = train_data[:100]
y = train_label[:, :100]
# The feedforward and backpropgation processes.
loss, _, gradients = feedforward_backprop(X, y, weights)

# check correctness of fully1_bias's gradient
for c in range(weights['fully1_bias'].shape[0]):
    weights['fully1_bias'][c, 0] = weights['fully1_bias'][c, 0] + EPSILON
    loss_2, _, gradients_2 = feedforward_backprop(X, y, weights)
    print('%.2e, %.2e, %.2e'%((loss_2 - loss) / EPSILON, gradients['fully1_bias_grad'][c, 0], gradients_2['fully1_bias_grad'][c, 0]))
    weights['fully1_bias'][c, 0]=weights['fully1_bias'][c, 0] - EPSILON

0.00e+00, 0.00e+00, 0.00e+00
1.48e-02, 1.48e-02, 1.48e-02
1.75e-02, 1.75e-02, 1.75e-02
4.60e-03, 4.60e-03, 4.60e-03
3.19e-02, 3.19e-02, 3.19e-02
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
7.90e-03, 7.90e-03, 7.90e-03
0.00e+00, 0.00e+00, 0.00e+00
2.25e-02, 2.25e-02, 2.25e-02
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
8.46e-03, 8.46e-03, 8.46e-03
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
2.27e-02, 2.27e-02, 2.27e-02
0.00e+00, 0.00e+00, 0.00e+00
0.00e+00, 0.00e+00, 0.00e+00
1.38e-02, 1.38e-02, 1.38e-02
0.00e+00, 0.00e+00, 0.00e+00


# Train

In [103]:
weights = {}
weights['fully1_weight'] = np.random.randn(400, 25) / 400
weights['fully1_bias'] = np.random.randn(25, 1)
weights['fully2_weight'] = np.random.randn(25, 10) / 25
weights['fully2_bias'] = np.random.randn(10, 1)

fully1_weight_inc = np.zeros_like(weights['fully1_weight'])
fully1_bias_inc = np.zeros_like(weights['fully1_bias'])
fully2_weight_inc = np.zeros_like(weights['fully2_weight'])
fully2_bias_inc = np.zeros_like(weights['fully2_bias'])

batch_size = 100
max_epoch = 10
momW = 0.9
wc = 0.0005
learning_rate = 0.1

for epoch in range(max_epoch):
    for i in range(int(np.ceil(train_num_cases / batch_size))):
        X_train = train_data[i * batch_size: (i + 1) * batch_size]
        y_train = train_label[:, i * batch_size: (i + 1) * batch_size]
        # The feedforward and backpropgation processes.
        loss, accuracy, gradients = feedforward_backprop(
            X_train, y_train, weights)
        print('%03d.%02d loss:%0.3e, accuracy:%f' % (epoch, i, loss, accuracy))

        # Updating weights
        fully1_weight_inc = get_new_weight_inc(
            fully1_weight_inc, weights['fully1_weight'], momW, wc, learning_rate, gradients['fully1_weight_grad'])
        weights['fully1_weight'] = weights['fully1_weight'] + fully1_weight_inc
        fully1_bias_inc = get_new_weight_inc(
            fully1_bias_inc, weights['fully1_bias'], momW, wc, learning_rate, gradients['fully1_bias_grad'])
        weights['fully1_bias'] = weights['fully1_bias'] + fully1_bias_inc

        fully2_weight_inc = get_new_weight_inc(
            fully2_weight_inc, weights['fully2_weight'], momW, wc, learning_rate, gradients['fully2_weight_grad'])
        weights['fully2_weight'] = weights['fully2_weight'] + fully2_weight_inc
        fully2_bias_inc = get_new_weight_inc(
            fully2_bias_inc, weights['fully2_bias'], momW, wc, learning_rate, gradients['fully2_bias_grad'])
        weights['fully2_bias'] = weights['fully2_bias'] + fully2_bias_inc

000.00 loss:2.742e+00, accuracy:0.070000
000.01 loss:2.588e+00, accuracy:0.120000
000.02 loss:2.456e+00, accuracy:0.070000
000.03 loss:2.361e+00, accuracy:0.150000
000.04 loss:2.303e+00, accuracy:0.110000
000.05 loss:2.343e+00, accuracy:0.100000
000.06 loss:2.339e+00, accuracy:0.120000
000.07 loss:2.305e+00, accuracy:0.130000
000.08 loss:2.198e+00, accuracy:0.150000
000.09 loss:2.133e+00, accuracy:0.290000
000.10 loss:2.166e+00, accuracy:0.260000
000.11 loss:2.102e+00, accuracy:0.290000
000.12 loss:2.174e+00, accuracy:0.260000
000.13 loss:2.170e+00, accuracy:0.210000
000.14 loss:2.052e+00, accuracy:0.210000
000.15 loss:2.074e+00, accuracy:0.210000
000.16 loss:2.014e+00, accuracy:0.240000
000.17 loss:1.975e+00, accuracy:0.370000
000.18 loss:1.900e+00, accuracy:0.350000
000.19 loss:1.827e+00, accuracy:0.350000
000.20 loss:1.781e+00, accuracy:0.420000
000.21 loss:1.789e+00, accuracy:0.420000
000.22 loss:1.622e+00, accuracy:0.530000
000.23 loss:1.610e+00, accuracy:0.520000
000.24 loss:1.47

# Test

In [104]:
loss, accuracy, _ = feedforward_backprop(test_data, test_label, weights)
print('loss:%0.3e, accuracy:%f\n' % (loss, accuracy))

loss:3.310e-01, accuracy:0.897000

