In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
pd_train = pd.read_csv('fashion-mnist_train.csv')

In [3]:
pd_train.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
pd_test = pd.read_csv('fashion-mnist_test.csv')

In [5]:
x_train = np.array(pd_train.drop('label',axis=1))

In [6]:
x_train = x_train/255

In [7]:
y = np.array(pd_train['label'])
y.shape

(60000,)

In [8]:
y_train = np.zeros((60000,10))

In [9]:
y_train[0][8]

0.0

In [10]:
for i in range(60000):
  classs = y[i]
  y_train[i][classs] = 1

In [11]:
y_train.shape

(60000, 10)

In [12]:
class FCLayer:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size) / np.sqrt(input_size + output_size)
        self.bias = np.random.randn(1, output_size) / np.sqrt(input_size + output_size)

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.bias

    def backward(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.reshape(-1,1), output_error)
        # bias_error = output_error
        
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [13]:
class ActivationLayer:
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward(self, input):
        self.input = input
        return self.activation(input)
    
    def backward(self, output_error, learning_rate):
        return output_error * self.activation_prime(self.input)

In [14]:
class SoftmaxLayer:
    def __init__(self, input_size):
        self.input_size = input_size
    
    def forward(self, input):
        self.input = input
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backward(self, output_error, learning_rate):
        input_error = np.zeros(output_error.shape)
        out = np.tile(self.output.T, self.input_size)
        return self.output * np.dot(output_error, np.identity(self.input_size) - out)

In [15]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return np.exp(-x) / (1 + np.exp(-x))**2

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(x, 0)

def relu_prime(x):
    return np.array(x >= 0).astype('int')

In [16]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_pred.size

def sse(y_true, y_pred):
    return 0.5 * np.sum(np.power(y_true - y_pred, 2))

def sse_prime(y_true, y_pred):
    return y_pred - y_true

In [17]:
x_train.shape

(60000, 784)

In [18]:
y_train.shape

(60000, 10)

In [19]:
y_train = y_train[0:1000,:]
x_train = x_train[0:1000,:]

In [20]:

network = [
    FCLayer(784, 1568),
    ActivationLayer(relu, relu_prime),
    FCLayer(1568,1568),
    ActivationLayer(relu, relu_prime),
    FCLayer(1568, 10),
    SoftmaxLayer(10)
]

epochs = 40
learning_rate = 0.1

# training
for epoch in range(epochs):
    error = 0
    for x, y_true in zip(x_train, y_train):
        # forward
        output = x
        for layer in network:
            output = layer.forward(output)
        
        # error (display purpose only)
        error += mse(y_true, output)

        # backward
        output_error = mse_prime(y_true, output)
        for layer in reversed(network):
            output_error = layer.backward(output_error, learning_rate)
    
    error /= len(x_train)
    print('%d/%d, error=%f' % (epoch + 1, epochs, error))

1/40, error=0.070570
2/40, error=0.045237
3/40, error=0.037643
4/40, error=0.032986
5/40, error=0.030048
6/40, error=0.027708
7/40, error=0.025580
8/40, error=0.023737
9/40, error=0.022247
10/40, error=0.020383
11/40, error=0.018916
12/40, error=0.017939
13/40, error=0.017582
14/40, error=0.015567
15/40, error=0.014616
16/40, error=0.014446
17/40, error=0.013367
18/40, error=0.012831
19/40, error=0.012793
20/40, error=0.012022
21/40, error=0.011406
22/40, error=0.011145
23/40, error=0.010313
24/40, error=0.010286
25/40, error=0.010317
26/40, error=0.009569
27/40, error=0.009240
28/40, error=0.009354
29/40, error=0.007806
30/40, error=0.009024
31/40, error=0.009127
32/40, error=0.008126
33/40, error=0.009021
34/40, error=0.007625
35/40, error=0.006364
36/40, error=0.007381
37/40, error=0.007722
38/40, error=0.006298
39/40, error=0.006043
40/40, error=0.007423


In [21]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

ratio = sum([np.argmax(y) == np.argmax(predict(network, x)) for x, y in zip(x_test, y_test)]) / len(x_test)
error = sum([mse(y, predict(network, x)) for x, y in zip(x_test, y_test)]) / len(x_test)
print('ratio: %.2f' % ratio)
print('mse: %.4f' % error)

NameError: name 'x_test' is not defined

In [22]:
import matplotlib.pyplot as plt

samples = 10
for test, true in zip(x_test[:samples], y_test[:samples]):
    image = np.reshape(test, (28, 28))
    plt.imshow(image, cmap='binary')
    plt.show()
    pred = predict(network, test)[0]
    idx = np.argmax(pred)
    idx_true = np.argmax(true)
    print('pred: %s, prob: %.2f, true: %d' % (idx, pred[idx], idx_true))

NameError: name 'x_test' is not defined