# Imports

In [1]:
from numpy.random import seed
seed(888)
from tensorflow import random
random.set_seed(404)

import tensorflow.compat.v1 as tf


#Step 1: Load the MNIST dataset
#from tensorflow.examples.tutorials.mnist import input_data
#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

#Step 2: Preprocess the data
#x_train, y_train = mnist.train.images, mnist.train.labels
#x_test, y_test = mnist.test.images, mnist.test.labels

#Step 3: Build the model
n_inputs = 28 * 28 # MNIST images are 28x28 pixels
n_hidden = 128 # number of neurons in the hidden layer
n_outputs = 10 # number of output classes

#Define the placeholders for input and output data
x = tf.placeholder(tf.float32, shape=[None, n_inputs], name="x")
y = tf.placeholder(tf.float32, shape=[None, n_outputs], name="y")

#Define the weights and biases for the hidden and output layers
#initializer = tf.contrib.layers.variance_scaling_initializer()
w1 = tf.Variable(tf.zeros([n_inputs, n_hidden]), name="w1")
b1 = tf.Variable(tf.zeros([n_hidden]), name="b1")
w2 = tf.Variable(tf.zeros([n_hidden, n_outputs]), name="w2")
b2 = tf.Variable(tf.zeros([n_outputs]), name="b2")

#Define the hidden layer
hidden = tf.nn.relu(tf.matmul(x, w1) + b1)

#Define the output layer
logits = tf.matmul(hidden, w2) + b2
y_pred = tf.nn.softmax(logits)

#Define the loss function
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))

#Define the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cross_entropy)

#Step 4: Train the model
n_epochs = 10
batch_size = 100

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(n_epochs):
        n_batches = mnist.train.num_examples // batch_size
        for batch in range(n_batches):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        
        # Calculate accuracy on test set after each epoch
        acc = sess.run(tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)), tf.float32)), 
                       feed_dict={x: x_test, y: y_test})
        print("Epoch", epoch, "Test Accuracy:", acc)

#Step 5: Evaluate the model
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    acc = sess.run(tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)), tf.float32)), 
                   feed_dict={x: x_test, y: y_test})
    print("Test Accuracy:", acc)


In [2]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


# Constants

In [3]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'

NR_CLASSES = 10
VALIDATION_SIZE= 10000

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = (IMAGE_WIDTH*IMAGE_HEIGHT*CHANNELS)

# Get the Data

In [4]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH,delimiter=',',dtype=int) #read the text file and convert to numpy array

CPU times: total: 0 ns
Wall time: 38.1 ms


In [5]:
y_train_all.shape

(60000,)

In [6]:
y_test = np.loadtxt(Y_TEST_PATH,delimiter = ',',dtype=int)

In [7]:
y_test.shape

(10000,)

In [8]:
%%time

x_train_all = np.loadtxt(X_TRAIN_PATH,delimiter=',',dtype=int)

CPU times: total: 4.22 s
Wall time: 6.63 s


In [9]:
%%time

x_test = np.loadtxt(X_TEST_PATH,delimiter=',',dtype=int)

CPU times: total: 578 ms
Wall time: 1.14 s


In [10]:
x_test.shape

(10000, 784)

# Explore

In [11]:
x_train_all.shape

(60000, 784)

In [12]:
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [13]:
y_train_all[:5] # five first entry, our targer is sparse because has int for classes

array([5, 0, 4, 1, 9])

# Data Preprocessing

In [14]:
# Rescale

x_train_all, x_test = x_train_all/255.0, x_test/255.0

#### Convert target values to one-hot encoding

In [15]:
values = y_train_all[:5]
np.eye(10)[values]  # array element indexing  #the values in the square brackets acts as the index array.
# we're using this entire array as an index and we're pulling out several of the rows from the identity matrix.

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [16]:
np.eye(10) # why 10? because we have got 10 different labels in our dataset 0-9

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [17]:
np.eye(10)[9]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])

In [18]:
values

array([5, 0, 4, 1, 9])

In [19]:
values[2]

4

In [20]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [21]:
y_train_all.shape

(60000, 10)

In [22]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

### Create validation dataset from training data

**Challenge:** Split the training dataset into a smaller training dataset and a validation dataset for the features and the labels. Create four arrays: ```x_val```, ```y_val```, ```x_train```, and ```y_train``` from ```x_train_all``` and ```y_train_all```. Use the validation size of 10,000. 

In [23]:
x_val =  x_train_all[:VALIDATION_SIZE] # first 10,000 value
y_val =  y_train_all[:VALIDATION_SIZE]

In [24]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [25]:
y_train.shape

(50000, 10)

# Setup Tensorflow Graph

- To create a tensor we need, data types and a shape

In [26]:
# set features and labels as placeholders with variable batch size and data type tf.float32
#X = tf.keras.Input(shape=(None, TOTAL_INPUTS), dtype=tf.float32) # features
#Y = tf.keras.Input(shape=(None, NR_CLASSES), dtype=tf.float32)  # labels


# set features and labels as placeholders with variable batch size and data type tf.float32
X =  tf.placeholder(tf.float32, shape=[None, TOTAL_INPUTS], name="x") # features
Y = tf.placeholder(tf.float32, shape=[None, NR_CLASSES], name="y")  # labels

### Neural Network Architecture

#### Hyperparameters

In [27]:
nr_epochs =5
learning_rate = 1e-4    # le-4 = 0.0001

# Nr neurons and layers
# the output layers is getting 10

n_hidden1 = 512
n_hidden2 = 64

## First Hidden Layer

In [28]:
# initialized weight for the first hidden layers
initial_w1 = tf.random.truncated_normal(shape=[TOTAL_INPUTS,n_hidden1], stddev=0.1, seed=42) # nr inputs and nr layers
w1 = tf.Variable(initial_value=initial_w1) #  variables are used to hold and update the parameters  

In [29]:
# initialized bias for the first hidden layers
initial_b1 = tf.constant(value=0, shape=[n_hidden1])  #value= zero to initializing biases.
b1 = tf.Variable(initial_value=initial_b1)

In [30]:
# calculates the input to the first hidden layer
# X -> raw input
layer1_in = tf.matmul(X, w1) + tf.cast(b1, tf.float32) #matrix multiplication between the input tensor X and the weight tensor w1
#layer1_in = tf.keras.layers.Lambda(lambda x: tf.add(tf.matmul(X, w1), tf.cast(b1, tf.float32)), name='layer1_in')

In [34]:
# output of this layers
layer1_out = tf.nn.relu(layer1_in)

## Second Hidden Layer

In [35]:
# initialized weight for the second hidden layers
# in second layer instead of total input, get the second hidden layer
initial_w2 = tf.random.truncated_normal(shape=[n_hidden1,n_hidden2], stddev=0.1, seed=42)
w2 = tf.Variable(initial_value=initial_w2)

# initialized bias for the second hidden layers
initial_b2 = tf.constant(value=0, shape=[n_hidden2])
b2 = tf.Variable(initial_value=initial_b2)

# what the features were that were going into the second hidden layer. output of second layers 
layer2_in = tf.matmul(layer1_out, w2) +  tf.cast(b2, tf.float32)

# output of this layers
layer2_out = tf.nn.relu(layer2_in)

## Output Layer

In [36]:
# weight
initial_w3 = tf.random.truncated_normal(shape=[n_hidden2,NR_CLASSES], stddev=0.1, seed=42)
w3 = tf.Variable(initial_value=initial_w3)

# bias
initial_b3 = tf.constant(value=0, shape=[NR_CLASSES])
b3 = tf.Variable(initial_value=initial_b3)

# layers input and output 
layer3_in = tf.matmul(layer2_out, w3) +  tf.cast(b3, tf.float32)
output = tf.nn.softmax(layer3_in)

# Loss, Optimisation & Metrics

### Defining Loss function

- Loss = Calculate the error between the actual output and predicted output


- Logit = the output from the output layer


- label = are going to be the actual labels (Y)

In [37]:
# we are training data in the batches that's why need to take the average of the losses

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

### Defining Optimizers

In [38]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

- our output is a operation which update the variable, i.e., weights and biases

### Accuracy Metrics

- Accuracy =  compare "prediction" and "true labels" and we need to check if they're equal.


- Once we know if they are equal, then we have a correct prediction.


- And once we know how many correct predictions we have, we can work out the accuracy of our model.

In [39]:
# compare two quantities in this case the output and Y 

correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # the average across all the batches

# Train our model 

- involves of starting a session, batching our data and finally running the trainig loop

## Run Sessions

In [41]:
# session object encapsulate the environment where all operation and calculation take placeand executed
sess = tf.Session()

In [None]:
# Initialized all the variables 

init = tf.global_variables_initializer()
sess.run(init) # run session

# these three line of code get us all set up

In [60]:
w1.eval(sess)

array([[-0.02807751, -0.01377521, -0.06763297, ..., -0.02663724,
         0.02861341, -0.05550233],
       [-0.16205125, -0.18859725, -0.03102448, ..., -0.0820701 ,
        -0.03345905, -0.02453214],
       [ 0.12582639, -0.16444902,  0.13603579, ..., -0.09897225,
        -0.09923435,  0.1451435 ],
       ...,
       [-0.04471838, -0.09593774, -0.08978765, ...,  0.04240045,
        -0.18997248,  0.00134785],
       [ 0.03215451,  0.04336654, -0.18240118, ...,  0.08296242,
        -0.10039439, -0.12682591],
       [ 0.08766606, -0.15083945,  0.08048793, ...,  0.07548849,
        -0.04359084, -0.11031353]], dtype=float32)

In [48]:
b3.eval(sess) # output biases

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
# fisrt we need to spilit our training data into smaller components
# because we want to be able to train our model on batches of 1000 samples at a time.

## Batching the Data

In [49]:
size_of_batch = 1000

In [51]:
num_examples = y_train.shape[0]
nr_iterations = int(num_examples/size_of_batch)

index_in_epoch= 0 # help where one batch end and another one start  
# 1st 0-> 999 2nd -> 1000 ->1999,...until 50000 examples in our training

In [None]:
# defining function for going from one batch to another batch
#batch size -> how big the batch be
# data and labels --> x, y

In [57]:
def next_batch(batch_size, data, labels):
    
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch   # inx 0 then will be update
    index_in_epoch += batch_size   # index_in_epoch + batch_size
    
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]  # first time you get value between 0 and 999

### Training Loop

In [58]:
for epoch in range(nr_epochs):  # iterating through each epoch
    
    # ============= Training Dataset =========
    for i in range(nr_iterations):      #iterate through the data itself
        
        batch_x, batch_y = next_batch(batch_size=size_of_batch, data=x_train, labels=y_train)
        
        feed_dictionary = {X:batch_x, Y:batch_y}
        
        sess.run(optimizer, feed_dict=feed_dictionary)
        
    
        batch_accuracy = sess.run(fetches=[accuracy], feed_dict=feed_dictionary)
        
    
    print(f'Epoch {epoch} \t| Training Accuracy = {batch_accuracy}')
    

print('Done training!')

Epoch 0 	| Training Accuracy = [0.363]
Epoch 1 	| Training Accuracy = [0.716]
Epoch 2 	| Training Accuracy = [0.789]
Epoch 3 	| Training Accuracy = [0.812]
Epoch 4 	| Training Accuracy = [0.825]
Done training!
