### The forward propagation

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import datasets

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0-alpha0
sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)
matplotlib 3.1.0
numpy 1.16.4
pandas 0.24.2
sklearn 0.21.2
tensorflow 2.0.0-alpha0
tensorflow.python.keras.api._v2.keras 2.2.4-tf


In [2]:
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

In [3]:
(x, y), (x_test, y_test) = datasets.mnist.load_data()
print(x.shape, y.shape)

x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)

print(x.shape, y.shape)
print(x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))

train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(128)
print(train_db)

train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)

w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev = 0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128], stddev = 0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10], stddev = 0.1))
b3 = tf.Variable(tf.zeros([10]))
lr = 1e-3

for epoch in range(100):
    for step, (x, y) in enumerate(train_db):
        x = tf.reshape(x, [-1, 28*28])
        with tf.GradientTape() as tape:
            h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
            h1 = tf.nn.relu(h1)
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)

            out = h2@w3 + b3
            y_onehot = tf.one_hot(y, depth=10)

            loss = tf.square(y_onehot - out)
            loss = tf.reduce_mean(loss)

        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
    #     w1 = w1 - lr * grads[0]
    #     b1 = b1 - lr * grads[1]
    #     w2 = w2 - lr * grads[2]
    #     b2 = b2 - lr * grads[3]
    #     w3 = w3 - lr * grads[4]
    #     b3 = b3 - lr * grads[5]
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])


        if step % 100 == 0:
            print(step, 'loss', float(loss))
    
    total_correct, total_num = 0,0
    for step, (x, y) in enumerate(test_db):
        x = tf.reshape(x, [-1, 28*28])
        h1 = tf.nn.relu(x@w1 + b1)
        h2 = tf.nn.relu(h1@w2 + b2)
        out = h2@w3 + b3
        
        prob = tf.nn.softmax(out, axis=1)
        pred = tf.argmax(prob, axis=1)
        pred = tf.cast(pred, dtype=tf.int32)
        
        correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
        correct = tf.reduce_sum(correct)
        
        total_correct += int(correct)
        total_num += x.shape[0]
        
        acc = total_correct / total_num
        
    print('test acc: ', acc)

(60000, 28, 28) (60000,)
(60000, 28, 28) (60000,)
<dtype: 'float32'> <dtype: 'int32'>
tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(0, shape=(), dtype=int32) tf.Tensor(9, shape=(), dtype=int32)
<BatchDataset shapes: ((None, 28, 28), (None,)), types: (tf.float32, tf.int32)>
batch: (128, 28, 28) (128,)
0 loss 0.3739185929298401
100 loss 0.20279736816883087
200 loss 0.18764355778694153
300 loss 0.15630705654621124
400 loss 0.17131371796131134
test acc:  0.1531
0 loss 0.1506539285182953
100 loss 0.14658518135547638
200 loss 0.15247231721878052
300 loss 0.13330957293510437
400 loss 0.15170340240001678
test acc:  0.2013
0 loss 0.13165995478630066
100 loss 0.12952342629432678
200 loss 0.13429725170135498
300 loss 0.11866581439971924
400 loss 0.13745377957820892
test acc:  0.2553
0 loss 0.11854919046163559
100 loss 0.11742846667766571
200 loss 0.12138597667217255
300 loss 0.1081489771604538
400 loss 0.12697237730026245
test acc:  0.3071
0 loss 0.1088

test acc:  0.7716
0 loss 0.04914478957653046
100 loss 0.0513569600880146
200 loss 0.04967530071735382
300 loss 0.04735064506530762
400 loss 0.058481864631175995
test acc:  0.7733
0 loss 0.04884248971939087
100 loss 0.05107574909925461
200 loss 0.04937392845749855
300 loss 0.04707380011677742
400 loss 0.05815477296710014
test acc:  0.7754
0 loss 0.04854652285575867
100 loss 0.05080237239599228
200 loss 0.04907980561256409
300 loss 0.046804215759038925
400 loss 0.057836245745420456
test acc:  0.7773
0 loss 0.048256468027830124
100 loss 0.05053632706403732
200 loss 0.04879232496023178
300 loss 0.046542853116989136
400 loss 0.05752643942832947
test acc:  0.7789
0 loss 0.04797373339533806
100 loss 0.050277501344680786
200 loss 0.04851140081882477
300 loss 0.04628737270832062
400 loss 0.05722472071647644
test acc:  0.7804
0 loss 0.04769743233919144
100 loss 0.050023049116134644
200 loss 0.048237428069114685
300 loss 0.04603815823793411
400 loss 0.05693094804883003
test acc:  0.7821
0 loss 0.

300 loss 0.03875236585736275
400 loss 0.04805907607078552
test acc:  0.8358
