In [1]:
import numpy as np
from tensorflow import keras
from keras import layers, activations
from keras.datasets import mnist
import tensorflow as tf

In [24]:
def ReLU(Z):
    return np.maximum(Z,0)

def derivative_ReLU(Z):
    return Z > 0

def softmax(Z):
    """Compute softmax values for each sets of scores in x."""
    exp = np.exp(Z - np.max(Z))
    return exp / exp.sum(axis=0)

In [25]:
def init_params(size):
    W1 = np.random.rand(10,size) - 0.5
    b1 = np.random.rand(10,1) - 0.5
    W2 = np.random.rand(10,10) - 0.5
    b2 = np.random.rand(10,1) - 0.5
    return W1, b1, W2, b2

In [26]:
def one_hot(Y):
    one_hot_Y = np.zeros((Y.max()+1,Y.size))
    one_hot_Y[Y,np.arange(Y.size)] = 1
    return one_hot_Y

def forward_propagation(X,W1,b1,W2,b2):
    Z1 = np.dot(W1, X) + b1 #10, m
    A1 = ReLU(Z1) # 10,m
    Z2 = np.dot(W2, A1) + b2 #10,m
    A2 = softmax(Z2) #10,m
    return Z1, A1, Z2, A2

def backward_propagation(X, Y, A1, A2, W2, Z1, m):
    one_hot_Y = one_hot(Y)
    dZ2 = 2*(A2 - one_hot_Y) #10,m
    dW2 = 1/m * (dZ2.dot(A1.T)) # 10 , 10
    db2 = 1/m * np.sum(dZ2,1) # 10, 1
    dZ1 = W2.T.dot(dZ2)*derivative_ReLU(Z1) # 10, m
    dW1 = 1/m * (dZ1.dot(X.T)) #10, 784
    db1 = 1/m * np.sum(dZ1,1) # 10, 1

    return dW1, db1, dW2, db2

In [27]:
def update_params(alpha, W1, b1, W2, b2, dW1, db1, dW2, db2):
    W1 -= alpha * dW1
    b1 -= alpha * np.reshape(db1, (10,1))
    W2 -= alpha * dW2
    b2 -= alpha * np.reshape(db2, (10,1))

    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y)/Y.size

def gradient_descent(X, Y, alpha, iterations):
    size , m = X.shape

    W1, b1, W2, b2 = init_params(size)
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_propagation(X, W1, b1, W2, b2)
        dW1, db1, dW2, db2 = backward_propagation(X, Y, A1, A2, W2, Z1, m)

        W1, b1, W2, b2 = update_params(alpha, W1, b1, W2, b2, dW1, db1, dW2, db2)
    prediction = get_predictions(A2)
    print(f'{get_accuracy(prediction, Y):.3%}')
    return W1, b1, W2, b2


1.1 Implementation

In [28]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
SCALE_FACTOR = 255
WIDTH = X_train.shape[1]
HEIGHT = X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0],WIDTH*HEIGHT).T / SCALE_FACTOR
X_test = X_test.reshape(X_test.shape[0],WIDTH*HEIGHT).T  / SCALE_FACTOR

W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.15, 300)

87.113%


1.2.1

In [18]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# compute the number of labels
num_labels = len(np.unique(y_train))

# One-Hot Encoding
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

image_size = X_train.shape[1]
input_size = image_size * image_size

# resize and normalize
x_train = np.reshape(X_train, [-1, input_size])
x_train = x_train.astype('float32') / 255
x_test = np.reshape(X_test, [-1, input_size])
x_test = x_test.astype('float32') / 255

batch_size = 64
hidden_units = 10
dropout = 0.15

1.2.2

In [9]:
inputs1 = layers.Input(shape=input_size)
hidden1 = layers.Dense(hidden_units, use_bias=True)(inputs1)
activation1 = keras.activations.relu(hidden1, alpha=0.3)
dropout1 = layers.Dropout(dropout)(activation1)
hidden2 = layers.Dense(hidden_units, use_bias=True)(dropout1)
activation2 = keras.activations.relu(hidden2, alpha=0.3)
dropout2 = layers.Dropout(dropout)(activation2)
outputs = layers.Dense(num_labels)(dropout2)
activation3 = keras.activations.softmax(outputs)

model10 = keras.Model(inputs1, activation3)
model10.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 784)]             0         
                                                                 
 dense_3 (Dense)             (None, 10)                7850      
                                                                 
 tf.nn.leaky_relu_2 (TFOpLam  (None, 10)               0         
 bda)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 10)                0         
                                                                 
 dense_4 (Dense)             (None, 10)                110       
                                                                 
 tf.nn.leaky_relu_3 (TFOpLam  (None, 10)               0         
 bda)                                                      

In [10]:
model10.compile(optimizer="adam", loss=keras.losses.categorical_crossentropy, metrics=[keras.metrics.categorical_accuracy])

history10 = model10.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [30]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
input_shape = (28, 28, 1)

x_train=X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
x_train=x_train / 255.0
x_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
x_test=x_test/255.0

y_train = tf.one_hot(Y_train.astype(np.int32), depth=10)
y_test = tf.one_hot(Y_test.astype(np.int32), depth=10)

1.2.4

In [41]:
model = keras.models.Sequential([
    keras.layers.Conv2D(32, (5,5), padding='same', activation='relu', input_shape=input_shape),
    keras.layers.Conv2D(32, (5,5), padding='same', activation='relu'),
    keras.layers.MaxPool2D(),
    keras.layers.Dropout(0.25),
    keras.layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    keras.layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    keras.layers.MaxPool2D(strides=(2,2)),
    keras.layers.Dropout(0.25),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.RMSprop(epsilon=1e-08), loss='categorical_crossentropy', metrics=['acc'])

In [42]:
model.compile(optimizer="adam", loss=keras.losses.categorical_crossentropy, metrics=[keras.metrics.categorical_accuracy])

history = model.fit(x_train, y_train, batch_size=64, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Final Accuracy Result: 0.9914