In [75]:
import numpy as np
import tensorflow as tf

In [76]:
import tensorflow_datasets as tfds

In [77]:
#Load the data
#name='path of file'
#with_info= provides a tuple containing info about version, features and # samples of the data set
#as_supervised = True, loads the data in a 2-tuple structure [input,target]
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

#extracting data
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
#validation comes from train part, there is no specific part for validation
#train contains 60,000 examples
#test contains 10,000 examples

#define the number of validation samples
#but we don't know if this num is integer or float
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

#therefore, we convert the variable into a given data type
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

#store the num of test samples
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)
#normally we'd like to scale the datain some way to make the result more numerically stable (inputs between 0-1)

#take an MNIST iamge and its label
#0-255
#we want float
#you can scale your data in other ways if you see fit.
#make sure that func. takes image and label
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

#scale the whole train dataset and store it in variable
scaled_train_and_validation_data = mnist_train.map(scale)

test_data = mnist_test.map(scale)

#what if every batch consist different procent of same element?
#that is why, we need to shuffle the data as random as possible, in order to get the best results
#therefore, we need buffer in order to put all data and shuffle in that buffer the all data at one time
#if buffer size >= num_sample, shuffling will happen at once (uniformly)
#if 1 < buffer size <= num_sample, we'll be optimizing the computational power of computer
BUFFER_SIZE = 10000
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
#after that we can extract the exact train and validation sets

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)
#.skip() method skips the given parameter and take the datas what is left from (all-given parameter)
#we'll use minibatch GD -> Sample size / batch size = element_in_one_batch
#batch_size = 1 -> SGD
#batch_size = # samples GD(single batch)
#1<batch_size z # samples -> mini batch

BATCH_SIZE = 100
#.batch(SIZE) -> put the given data into the given sizes of batches
train_data = train_data.batch(BATCH_SIZE)#update the weights every batch(in every 100 element)
#we need to reshape validation_data into batch format too but when we forward propagate we need just 1 batch cause
#we take all the datas at once, because we want exact values(not updating in every 100 element)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

#validation data must have the same shape and obj. properties as the train and test data
#remember that we did as_supervised -> loads the data in 2-Tuple structure [inputs,targets]
#therefore, we need extract and convert the validation inputs/targets too

validation_inputs, validation_targets = next(iter(validation_data))
#.iter() = creates an obj. which can be iterated one element at a time
#.next() = loads the next element of an iterable object



In [78]:
#we have 784 input, 10 output nodes, 2 hidden layer 50 nodes each
#you can create different width hidden layers if it works better
input_size = 784
output_size = 10
hidden_layer_size = 100

model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='sigmoid'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='sigmoid'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
    
])

In [79]:
#custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
#model.compile(optimizer=custom_optimizer, loss ='sparse_categorical_crossentropy', metrics=['accuracy'])
model.compile(optimizer='adam', loss ='sparse_categorical_crossentropy', metrics=['accuracy'])

#loss func for classifiers
#there are 3 types of cross entropy -> binary, categorical, sparse_categorical
#binary -> case in that we got binary encoding
#categorical -> expects that you've one-hot encoded the targets
#sparse_categorical -> applies one-hot encoding
#increasing the learning rate from 0.0001 to 0.001 gives us better acc, can take a little bit more time or equal
#using 'adam' as optimizer gives us bitter results rather than custom_optimizer

In [80]:
#Training part
NUM_EPOCHS = 5
model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)
#if you decrease the batch_size acc increases but the time increases too
#if you increase the hidden_layer_size, acc increases too but the time increases itself too
#adding more layers can lower the acc and increase the time, not effektiv

#these accs are validation accs. So it is possible that we overfitted the data
#we have to test it too with forward propagating the test set

Epoch 1/5
540/540 - 5s - loss: 0.7637 - accuracy: 0.8164 - val_loss: 0.3088 - val_accuracy: 0.9162 - 5s/epoch - 9ms/step
Epoch 2/5
540/540 - 5s - loss: 0.2564 - accuracy: 0.9266 - val_loss: 0.2265 - val_accuracy: 0.9377 - 5s/epoch - 9ms/step
Epoch 3/5
540/540 - 5s - loss: 0.1949 - accuracy: 0.9429 - val_loss: 0.1847 - val_accuracy: 0.9495 - 5s/epoch - 9ms/step
Epoch 4/5
540/540 - 5s - loss: 0.1577 - accuracy: 0.9537 - val_loss: 0.1564 - val_accuracy: 0.9583 - 5s/epoch - 9ms/step
Epoch 5/5
540/540 - 5s - loss: 0.1314 - accuracy: 0.9614 - val_loss: 0.1307 - val_accuracy: 0.9642 - 5s/epoch - 9ms/step


<keras.callbacks.History at 0x2152cac8c70>

In [81]:
#Test the model
test_loss, test_acc = model.evaluate(test_data)



In [82]:
print(('Test loss: {0:.2f}. Test accuracy: {1: .2f}%').format(test_loss, test_acc*100.))
#we can print it too.
#Test accuracy shows that we did not overfit afterall.
#If the model would change, test set should be evaluated from the beginning

Test loss: 0.13. Test accuracy:  96.04%


In [83]:
#by the testing the model, relu > tanh > sigmoid, for this example
#time parameter -> relu 24s, tanh 26s, sigmoid 25s
#accuracy parameter -> relu 97.38, tanh 97.23, sigmoid 96.04