In [12]:
import numpy as np
from scipy import signal
from keras.datasets import mnist
from keras.utils import np_utils

In [13]:
#layer.py
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        # TODO: return output
        pass

    def backward(self, output_gradient, learning_rate):
        # TODO: update parameters and return input gradient
        pass

In [14]:
#dense.py
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient

In [15]:
#convolutional.py
class Convolutional(Layer):
    def __init__(self, input_shape, kernel_size, depth):
        input_depth, input_height, input_width = input_shape
        self.depth = depth
        self.input_shape = input_shape
        self.input_depth = input_depth
        self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size + 1)
        self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
        self.kernels = np.random.randn(*self.kernels_shape)
        self.biases = np.random.randn(*self.output_shape)

    def forward(self, input):
        self.input = input
        self.output = np.copy(self.biases)
        for i in range(self.depth):
            for j in range(self.input_depth):
                self.output[i] += signal.correlate2d(self.input[j], self.kernels[i, j], "valid")
        return self.output

    def backward(self, output_gradient, learning_rate):
        kernels_gradient = np.zeros(self.kernels_shape)
        input_gradient = np.zeros(self.input_shape)

        for i in range(self.depth):
            for j in range(self.input_depth):
                kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
                input_gradient[j] += signal.convolve2d(output_gradient[i], self.kernels[i, j], "full")

        self.kernels -= learning_rate * kernels_gradient
        self.biases -= learning_rate * output_gradient
        return input_gradient

In [16]:
#reshape.py
class Reshape(Layer):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape

    def forward(self, input):
        return np.reshape(input, self.output_shape)

    def backward(self, output_gradient, learning_rate):
        return np.reshape(output_gradient, self.input_shape)

In [17]:
#activation.py
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [18]:
#activations.py
class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)

        def tanh_prime(x):
            return 1 - np.tanh(x) ** 2

        super().__init__(tanh, tanh_prime)

class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

        super().__init__(sigmoid, sigmoid_prime)

class Softmax(Layer):
    def forward(self, input):
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backward(self, output_gradient, learning_rate):
        # This version is faster than the one presented in the video
        n = np.size(self.output)
        return np.dot((np.identity(n) - self.output.T) * self.output, output_gradient)
        # Original formula:
        # tmp = np.tile(self.output, n)
        # return np.dot(tmp * (np.identity(n) - np.transpose(tmp)), output_gradient)

class Linear(Activation):
    def __init__(self):
        def linear(x):
            return x

        def linear_prime(x):
            return 1 
        
        super().__init__(linear, linear_prime)

class Activation_ReLU(Activation):
    def __init__(self):
        def relu(x):
            return np.maximum(0, x)

        def relu_prime(x):
            return np.where(x > 0, 1, 0)

        super().__init__(relu, relu_prime)

In [19]:
#losses.py
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [20]:
#netwwork.py
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

def train(network, loss, loss_prime, x_train, y_train, epochs = 1000, learning_rate = 0.01, verbose = True):
    for e in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):
            # forward
            output = predict(network, x)

            # error
            error += loss(y, output)

            # backward
            grad = loss_prime(y, output)
            for layer in reversed(network):
                grad = layer.backward(grad, learning_rate)

        error /= len(x_train)
        if verbose:
            print(f"{e + 1}/{epochs}, error={error}")


In [21]:
#preprocess data
def preprocess_data(x, y, limit):
    zero_index = np.where(y == 0)[0][:limit]
    one_index = np.where(y == 1)[0][:limit]
    two_index = np.where(y == 2)[0][:limit]
    three_index = np.where(y == 3)[0][:limit]
    four_index = np.where(y == 4)[0][:limit]
    five_index = np.where(y == 5)[0][:limit]
    six_index = np.where(y == 6)[0][:limit]
    seven_index = np.where(y == 7)[0][:limit]
    eight_index = np.where(y == 8)[0][:limit]
    nine_index = np.where(y == 9)[0][:limit]
    all_indices = np.hstack((zero_index, one_index,two_index,three_index,four_index,five_index,six_index,seven_index,eight_index,nine_index,))
    all_indices = np.random.permutation(all_indices)
    x, y = x[all_indices], y[all_indices]
    x = x.reshape(len(x), 1, 28, 28)
    x = x.astype("float32") / 255
    y = np_utils.to_categorical(y)
    y = y.reshape(len(y), 10, 1)
    return x, y


In [22]:
# driver code1 for mnist binary classification using convolutional
# neural netwwwork
# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 5),
    Sigmoid(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 2),
    Sigmoid()
]

# train
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_prime,
    x_train,
    y_train,
    epochs=20,
    learning_rate=0.1
)

ValueError: operands could not be broadcast together with shapes (10,1) (2,1) 

In [None]:
# driver code1 testing
# test
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if(np.argmax(output) == np.argmax(y)):
        correct = correct + 1
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")
print()
print("Accuracy: ", correct/len(y_test)*100, "%")

pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true:

In [None]:
# driver code1 for mnist binary classification using convolutional
# neural netwwwork
# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 5),
    Activation_ReLU(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 2),
    Softmax()
]

# train
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_prime,
    x_train,
    y_train,
    epochs=20,
    learning_rate=0.1
)

1/20, error=0.5451985883554424
2/20, error=0.08325582181707579
3/20, error=0.019721197087970512
4/20, error=0.008380132352364933
5/20, error=0.005058065563311611
6/20, error=0.0031582073461313307
7/20, error=0.002226742588672024
8/20, error=0.0017460885072113593
9/20, error=0.0014969196448822136
10/20, error=0.001343668143507578
11/20, error=0.0012307930455463028
12/20, error=0.0011426931485519877
13/20, error=0.0010710600140899848
14/20, error=0.0010110919765355141
15/20, error=0.0009597847044941274
16/20, error=0.0009151320333976202
17/20, error=0.0008757242620264941
18/20, error=0.0008405264354137297
19/20, error=0.0008087613701586349
20/20, error=0.0007798353994929555


In [None]:
# driver code1 testing
# test
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if(np.argmax(output) == np.argmax(y)):
        correct = correct + 1
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")
print()
print("Accuracy: ", correct/len(y_test)*100, "%")

pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true:

In [None]:
# driver code1 for mnist binary classification using convolutional
# neural netwwwork
# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 5),
    Tanh(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Activation_ReLU(),
    Dense(100, 10),
    Softmax()
]

# train
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_prime,
    x_train,
    y_train,
    epochs=20,
    learning_rate=0.1
)

  return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))
  return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))
  return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)
  return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)


1/20, error=nan
2/20, error=nan
3/20, error=nan
4/20, error=nan
5/20, error=nan
6/20, error=nan
7/20, error=nan
8/20, error=nan
9/20, error=nan
10/20, error=nan
11/20, error=nan
12/20, error=nan
13/20, error=nan
14/20, error=nan
15/20, error=nan
16/20, error=nan
17/20, error=nan
18/20, error=nan
19/20, error=nan
20/20, error=nan


In [None]:
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if(np.argmax(output) == np.argmax(y)):
        correct = correct + 1
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")
print()
print("Accuracy: ", correct/len(y_test)*100, "%")

pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true:

## 6C


In [None]:
# driver code1 for mnist binary classification using convolutional
# neural netwwwork
# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 5),
    Activation_ReLU(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 10),
    Softmax()
]

# train
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_prime,
    x_train,
    y_train,
    epochs=20,
    learning_rate=0.1
)

1/20, error=0.5368173321838982
2/20, error=0.2874085444189357
3/20, error=0.24624424533206096
4/20, error=0.2312209445571184
5/20, error=0.2039455104562525
6/20, error=0.17902046562083318
7/20, error=0.16712667856531652
8/20, error=0.16287923889702294
9/20, error=0.14758369087188997
10/20, error=0.14080771621293386
11/20, error=0.1284430706749735
12/20, error=0.13433756469942487
13/20, error=0.12433648445745313
14/20, error=0.12545293154191092
15/20, error=0.12271713511180168
16/20, error=0.11044376800327312
17/20, error=0.10017870847336306
18/20, error=0.10500963554117475
19/20, error=0.09602441189369268
20/20, error=0.09222413318012633


In [None]:
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if(np.argmax(output) == np.argmax(y)):
        correct = correct + 1
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")
print()
print("Accuracy: ", correct/len(y_test)*100, "%")

pred: 3, true: 6
pred: 7, true: 7
pred: 2, true: 2
pred: 5, true: 5
pred: 1, true: 1
pred: 7, true: 7
pred: 1, true: 1
pred: 7, true: 7
pred: 3, true: 3
pred: 8, true: 8
pred: 5, true: 8
pred: 3, true: 3
pred: 3, true: 3
pred: 9, true: 9
pred: 2, true: 3
pred: 1, true: 1
pred: 6, true: 8
pred: 2, true: 2
pred: 3, true: 0
pred: 9, true: 9
pred: 7, true: 7
pred: 4, true: 4
pred: 3, true: 0
pred: 5, true: 5
pred: 1, true: 1
pred: 2, true: 6
pred: 5, true: 3
pred: 3, true: 0
pred: 1, true: 3
pred: 1, true: 1
pred: 4, true: 4
pred: 4, true: 4
pred: 1, true: 1
pred: 2, true: 5
pred: 6, true: 6
pred: 3, true: 5
pred: 7, true: 7
pred: 8, true: 8
pred: 5, true: 2
pred: 2, true: 2
pred: 5, true: 5
pred: 2, true: 2
pred: 0, true: 8
pred: 4, true: 4
pred: 1, true: 1
pred: 9, true: 9
pred: 6, true: 6
pred: 4, true: 9
pred: 7, true: 7
pred: 1, true: 1
pred: 6, true: 6
pred: 1, true: 1
pred: 6, true: 2
pred: 1, true: 1
pred: 3, true: 5
pred: 8, true: 8
pred: 6, true: 6
pred: 6, true: 8
pred: 8, true:

  return 1 / (1 + np.exp(-x))


In [None]:
# driver code1 for mnist binary classification using convolutional
# neural netwwwork
# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 5),
    Activation_ReLU(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 10),
    Softmax()
]

# train
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_prime,
    x_train,
    y_train,
    epochs=20,
    learning_rate=0.01
)

1/20, error=1.095253081339272
2/20, error=0.6608405866971916
3/20, error=0.5001240301046315
4/20, error=0.41893852202684106
5/20, error=0.37170060790778725
6/20, error=0.3286982167686718
7/20, error=0.29229061228279635
8/20, error=0.264336426648207
9/20, error=0.23950870423856518
10/20, error=0.21767512306878833
11/20, error=0.20526715954842262
12/20, error=0.19407705401747002
13/20, error=0.18354209299050492
14/20, error=0.17534684734717518
15/20, error=0.16771529654466583
16/20, error=0.1612406416809441
17/20, error=0.15633589478347376
18/20, error=0.15053305368507064
19/20, error=0.14569626979755443
20/20, error=0.141561623809622


In [None]:
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if(np.argmax(output) == np.argmax(y)):
        correct = correct + 1
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")
print()
print("Accuracy: ", correct/len(y_test)*100, "%")

pred: 2, true: 4
pred: 8, true: 0
pred: 8, true: 2
pred: 7, true: 9
pred: 0, true: 0
pred: 0, true: 0
pred: 4, true: 6
pred: 6, true: 6
pred: 7, true: 7
pred: 2, true: 8
pred: 7, true: 0
pred: 3, true: 3
pred: 2, true: 9
pred: 5, true: 5
pred: 8, true: 8
pred: 7, true: 7
pred: 0, true: 0
pred: 9, true: 9
pred: 5, true: 5
pred: 6, true: 6
pred: 7, true: 7
pred: 3, true: 8
pred: 5, true: 5
pred: 0, true: 0
pred: 2, true: 2
pred: 5, true: 3
pred: 2, true: 9
pred: 8, true: 9
pred: 9, true: 8
pred: 4, true: 2
pred: 2, true: 6
pred: 4, true: 4
pred: 2, true: 3
pred: 2, true: 0
pred: 4, true: 4
pred: 6, true: 4
pred: 8, true: 5
pred: 7, true: 9
pred: 7, true: 4
pred: 1, true: 1
pred: 4, true: 6
pred: 0, true: 3
pred: 2, true: 1
pred: 9, true: 4
pred: 7, true: 0
pred: 7, true: 7
pred: 5, true: 3
pred: 7, true: 8
pred: 5, true: 0
pred: 4, true: 9
pred: 1, true: 1
pred: 5, true: 3
pred: 4, true: 6
pred: 1, true: 1
pred: 4, true: 7
pred: 6, true: 6
pred: 1, true: 1
pred: 3, true: 0
pred: 8, true:

In [None]:
# driver code1 for mnist binary classification using convolutional
# neural netwwwork
# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 3),
    Tanh(),
    Reshape((3, 26, 26), (3 * 26 * 26, 1)),
    Dense(3 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 10),
    Softmax()
]

# train
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_prime,
    x_train,
    y_train,
    epochs=50,
    learning_rate=0.1
)

1/50, error=0.45992478808352
2/50, error=0.28333450880958183
3/50, error=0.24104013460628856
4/50, error=0.19703393493306312
5/50, error=0.17361958326722746
6/50, error=0.15000087236469614
7/50, error=0.13785327180209273
8/50, error=0.13018554222319506
9/50, error=0.11221447959458714
10/50, error=0.10847134766798924
11/50, error=0.09687486118102517
12/50, error=0.09261524877563412
13/50, error=0.084275742716941
14/50, error=0.08196387823491609
15/50, error=0.07469566651767295
16/50, error=0.07021719594113901
17/50, error=0.06398629901777458
18/50, error=0.06337845620369736
19/50, error=0.05730647179570568
20/50, error=0.05224748253095374
21/50, error=0.04700802341587178
22/50, error=0.04428537296455605
23/50, error=0.040549654937279175
24/50, error=0.03782684270720552
25/50, error=0.038730102326085925
26/50, error=0.03776551180765092
27/50, error=0.03491131473033641
28/50, error=0.03217335114000006
29/50, error=0.02627911152278282
30/50, error=0.02467654358267102
31/50, error=0.0227666

In [None]:
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if(np.argmax(output) == np.argmax(y)):
        correct = correct + 1
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")
print()
print("Accuracy: ", correct/len(y_test)*100, "%")

pred: 7, true: 7
pred: 2, true: 2
pred: 8, true: 8
pred: 3, true: 5
pred: 9, true: 9
pred: 3, true: 3
pred: 8, true: 9
pred: 2, true: 2
pred: 6, true: 6
pred: 2, true: 2
pred: 3, true: 2
pred: 1, true: 1
pred: 2, true: 2
pred: 6, true: 6
pred: 0, true: 0
pred: 0, true: 0
pred: 6, true: 6
pred: 0, true: 0
pred: 7, true: 7
pred: 0, true: 0
pred: 7, true: 2
pred: 7, true: 6
pred: 9, true: 9
pred: 3, true: 3
pred: 7, true: 7
pred: 7, true: 7
pred: 9, true: 9
pred: 8, true: 5
pred: 4, true: 4
pred: 4, true: 7
pred: 3, true: 3
pred: 5, true: 5
pred: 2, true: 2
pred: 4, true: 6
pred: 4, true: 4
pred: 1, true: 1
pred: 8, true: 6
pred: 6, true: 4
pred: 0, true: 0
pred: 9, true: 4
pred: 6, true: 6
pred: 7, true: 7
pred: 0, true: 0
pred: 9, true: 4
pred: 8, true: 8
pred: 0, true: 0
pred: 2, true: 2
pred: 2, true: 6
pred: 7, true: 7
pred: 6, true: 6
pred: 4, true: 6
pred: 0, true: 0
pred: 4, true: 6
pred: 6, true: 6
pred: 5, true: 5
pred: 7, true: 7
pred: 4, true: 4
pred: 6, true: 6
pred: 2, true:

In [None]:
# driver code1 for mnist binary classification using convolutional
# neural netwwwork
# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 1000)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 3),
    Tanh(),
    Reshape((3, 26, 26), (3 * 26 * 26, 1)),
    Dense(3 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 10),
    Softmax()
]

# train
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_prime,
    x_train,
    y_train,
    epochs=20,
    learning_rate=0.1
)

1/20, error=0.20985422523754574
2/20, error=0.11927376572583064
3/20, error=0.10051598550691361
4/20, error=0.08981421183764518
5/20, error=0.07979789117373265
6/20, error=0.07544874538356759
7/20, error=0.07563480407920943
8/20, error=0.06991650698857863
9/20, error=0.06950263807372484
10/20, error=0.06155728176241677
11/20, error=0.05964942471884969
12/20, error=0.05824824026906457
13/20, error=0.053004345179208226
14/20, error=0.054162758849304965
15/20, error=0.05200444060815007
16/20, error=0.0507345147820609
17/20, error=0.04677776913846318
18/20, error=0.04785631272765569
19/20, error=0.04497810027604134
20/20, error=0.04287294414436659


In [None]:
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if(np.argmax(output) == np.argmax(y)):
        correct = correct + 1
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")
print()
print("Accuracy: ", correct/len(y_test)*100, "%")

pred: 8, true: 1
pred: 1, true: 1
pred: 9, true: 9
pred: 7, true: 7
pred: 8, true: 8
pred: 9, true: 9
pred: 1, true: 1
pred: 1, true: 1
pred: 9, true: 9
pred: 4, true: 5
pred: 8, true: 8
pred: 3, true: 5
pred: 1, true: 1
pred: 3, true: 3
pred: 7, true: 7
pred: 6, true: 6
pred: 6, true: 6
pred: 2, true: 2
pred: 1, true: 1
pred: 6, true: 6
pred: 9, true: 7
pred: 6, true: 6
pred: 9, true: 9
pred: 5, true: 5
pred: 1, true: 1
pred: 7, true: 7
pred: 6, true: 6
pred: 5, true: 5
pred: 4, true: 4
pred: 5, true: 6
pred: 0, true: 0
pred: 2, true: 8
pred: 4, true: 6
pred: 1, true: 1
pred: 2, true: 2
pred: 7, true: 7
pred: 2, true: 3
pred: 8, true: 8
pred: 9, true: 9
pred: 2, true: 2
pred: 0, true: 0
pred: 7, true: 7
pred: 0, true: 0
pred: 3, true: 8
pred: 4, true: 0
pred: 7, true: 3
pred: 0, true: 0
pred: 4, true: 4
pred: 9, true: 9
pred: 2, true: 2
pred: 3, true: 5
pred: 7, true: 7
pred: 3, true: 3
pred: 4, true: 4
pred: 2, true: 2
pred: 6, true: 6
pred: 7, true: 7
pred: 6, true: 6
pred: 1, true: