In [2]:
%pip install numpy tensorflow

Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/20/cf/55b68d5896e58e25f41e5bc826c96678073b512be8ca2b1f4b101e0f195c/tensorflow-2.19.0-cp311-cp311-macosx_12_0_arm64.whl.metadata
  Downloading tensorflow-2.19.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (4.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/87/04/9d75e1d3bb4ab8ec67ff10919476ccdee06c098bcfcf3a352da5f985171d/absl_py-2.3.0-py3-none-any.whl.metadata
  Downloading absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Obtaining dependency information for astunparse>=1.6.0 from https://files.pythonhosted.org/packages/2b/03/13dde6512ad7b4557eb792fbcf0c653af6076b81e5941d36ec61f7ce6028/astunparse-1.6.3-py2.py3-none-any.whl.metadata
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=2

In [1]:
import numpy as np
from keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from Dense import Dense
from activations import Tanh
from mse import mse, mse_prime
from network import train, predict

In [2]:
def preprocess_data(x, y, limit):
    # reshape and normalize input data
    x = x.reshape(x.shape[0], 28 * 28, 1)
    x = x.astype("float32") / 255
    # encode output which is a number in range [0,9] into a vector of size 10
    # e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    y = to_categorical(y)
    y = y.reshape(y.shape[0], 10, 1)
    return x[:limit], y[:limit]

In [3]:
# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 1000)
x_test, y_test = preprocess_data(x_test, y_test, 50)

In [4]:
# neural network
network = [
    Dense(28 * 28, 40),
    Tanh(),
    Dense(40, 10),
    Tanh()
]

In [5]:
# train
train(network, mse, mse_prime, x_train, y_train, epoch=100, learning_rate=0.1)

1/100, error=0.922434627787569
2/100, error=0.8514436251544266
3/100, error=0.7865584576765654
4/100, error=0.7401757945634753
5/100, error=0.6845573354300263
6/100, error=0.6008441905500164
7/100, error=0.47691296169426467
8/100, error=0.33499395964324985
9/100, error=0.23973492958466328
10/100, error=0.17258632281067252
11/100, error=0.13263718923452547
12/100, error=0.11731258538667681
13/100, error=0.11041173379175591
14/100, error=0.10573146986273238
15/100, error=0.10265505110740827
16/100, error=0.09996959633805373
17/100, error=0.09802271223208094
18/100, error=0.09655888960739163
19/100, error=0.09553128149350497
20/100, error=0.09413233903372265
21/100, error=0.09299873067879344
22/100, error=0.09199482463596458
23/100, error=0.09091030348940714
24/100, error=0.08990500379865952
25/100, error=0.08874551019251259
26/100, error=0.08768275353607757
27/100, error=0.08655325095528879
28/100, error=0.0853995963081566
29/100, error=0.08424175204212431
30/100, error=0.083020290383771

In [6]:
# test
correct = 0
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    if np.argmax(output) == np.argmax(y):
        correct += 1

print(correct / len(x_test))

0.74
