In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

tf.random.set_seed(42)

### Data

In [3]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

2023-05-31 10:30:10.780223: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 599 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB MIG 3g.20gb, pci bus id: 0000:92:00.0, compute capability: 8.0


In [4]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test'] 

In [5]:
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

# num_train_samples = 0.9 * mnist_info.splits['train'].num_examples
# num_train_samples = tf.cast(num_train_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)


In [6]:
print(mnist_info.splits['train'].num_examples)
print(mnist_info.splits['test'].num_examples)

60000
10000


#### custom scaler function that takes an input and label, returns both input and label

In [7]:
def scale(image, label):
    print((tf.math.reduce_max(image)))
    image = tf.cast(image, tf.float32)
    print((tf.reduce_max(image)))
    image /= 255
    print((tf.math.reduce_max(image)))
    return image, label

In [8]:
# tf.math.reduce_mean(mnist_train['image'])
# the dataset consists of a number of lists
# each list consist of a multidimentional matrix according to the dataset
# in this case each tensor is of size 1x28x28
ds = mnist_train.take(1)
print(type(ds))

for example in (ds):
    print(type(example))
    # print(x.reshape(1,28,28))
    # print(type(tf.convert_to_tensor(x)))
    # print(example[0][27])
    # print(tf.reduce_max(example))


<class 'tensorflow.python.data.ops.take_op._TakeDataset'>
<class 'tuple'>


2023-05-31 10:30:12.553828: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1]
	 [[{{node Placeholder/_1}}]]
2023-05-31 10:30:12.554689: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int64 and shape [1]
	 [[{{node Placeholder/_4}}]]
2023-05-31 10:30:12.637628: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline s

* tensorflow provides _map_ function to use a custom scaler that you want
* I'm not sure why we have to standardize the values for pixels where it already has a fixed range of 0-255. that means it is already scaled.

In [9]:
scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

Tensor("Max:0", shape=(), dtype=uint8)
Tensor("Max_1:0", shape=(), dtype=float32)
Tensor("Max_2:0", shape=(), dtype=float32)
Tensor("Max:0", shape=(), dtype=uint8)
Tensor("Max_1:0", shape=(), dtype=float32)
Tensor("Max_2:0", shape=(), dtype=float32)


#### Shuffle the training data to prepare for random split (to extract validation data)
* BUFFER_SIZE is used to ask the shuffle function to take this bunch of data at a time and shuffle.
* if BUFFER_SIZE is 1, no shuffle will happen.
* if BUFFER_SIZE is full sample, the shuffle will upload all the data into the memory then shuffle all.

In [10]:
BUFFER_SIZE = 10000
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

#### Split training dataset into validation and training with _take(number)_ and _skip(number)_ functions
* **data.take(number)**: returns the first _number_ of the dataset.
* **data.skip(number)**: returns the dataset after first _number_.

In [11]:
validation_data = shuffled_train_and_validation_data.take(num_validation_samples).cache()
validation_data = validation_data.prefetch(tf.data.AUTOTUNE)
validation_data = validation_data.repeat()
training_data = shuffled_train_and_validation_data.skip(num_validation_samples)

#### Set the batch size

In [12]:
BATCH_SIZE = 100

#### Batch the data
* Only training data is batched becuase the validation data is used in forward propagation only, thus no weight update is happening and it is relatively small
* The tf.data.Dataset.batch() function in TensorFlow returns a batched dataset.

In [13]:
type(validation_data)

tensorflow.python.data.ops.repeat_op._RepeatDataset

In [14]:
training_data = training_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

In [15]:
type(validation_data)

tensorflow.python.data.ops.batch_op._BatchDataset

#### prepare validation data to be used by the model
* validation_data is of type BatchDataset and iter converts it to a tensor, which can be passed to the fit function
* iter() is a python function to change the datatype into an iterable
* next() will take the next batch from the data

In [16]:
validation_input, validation_target = next(iter(validation_data))

2023-05-31 10:30:15.749406: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_3' with dtype int64 and shape [1]
	 [[{{node Placeholder/_3}}]]
2023-05-31 10:30:15.750511: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1]
	 [[{{node Placeholder/_2}}]]
2023-05-31 10:30:16.490563: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline s

### Model

#### outline the model
- Input:
    * each image is 28*28 pixles --> 784 pixels in total
    * each pixel can have a value from 0 - 255
    * the input data has only one feature which is the greyscale intensity so the data is in 784x1 shape
- Output:
    * the output layer should have 10 nodes that is from 0 - 9
- Hidden Layers:
    * we want to create two hidden layers in between to process the data

#### define the hyper-parameters

In [17]:
input_size = 784
output_size = 10
hidden_layer_size = 100 # this number can be changed as you like.

#### build a model sequentially
* Flatten() function will take the input and convert it from a tensor (of rank3 shape in our example) into the proper vector shape
* Dense() function will take the inputs and dot product it with the weights matrix and also the activation function
    * thus, it is used to add layers to the model

In [18]:
model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size*2, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='tanh'),
                            tf.keras.layers.Dense(hidden_layer_size/2, activation='tanh'),
                            tf.keras.layers.Dense(hidden_layer_size/4, activation='tanh'),
                            tf.keras.layers.Dense(round(hidden_layer_size/8), activation='tanh'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])
model2 = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='tanh'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size/2, activation='relu'),
                            # tf.keras.layers.Dropout(0.2), # droupout used to prevent overfitting.
                            tf.keras.layers.Dense(hidden_layer_size/4, activation='relu'),
                            tf.keras.layers.Dense(round(hidden_layer_size/8), activation='relu'),
                            tf.keras.layers.Dropout(0.2), # droupout used to prevent overfitting.
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

#### Choose the optimizer and loss functions

In [19]:
# new ADAM doesn't work for me!
# from tensorflow.keras.optimizers import Adam
custom_optimizer = tf.keras.optimizers.legacy.Adam()
model.compile(optimizer=custom_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.compile(optimizer=custom_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#### Training

In [20]:
NUM_EPOCHS = 100

In [21]:
#'loss','accuracy','val_loss','val_accuracy'.
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)
model.fit(training_data, epochs=NUM_EPOCHS, validation_data=(validation_input, validation_target), callbacks=[early_stopping], verbose=2)

Epoch 1/100


2023-05-31 10:30:18.643299: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1]
	 [[{{node Placeholder/_2}}]]
2023-05-31 10:30:18.644399: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_3' with dtype int64 and shape [1]
	 [[{{node Placeholder/_3}}]]
2023-05-31 10:30:20.361125: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:219] failed to create cublas handle: cublasGetStatusString symbol not found.
2023-05-31 10:30:20.361209: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:222] Failure to initialize cublas may be due to OOM (cublas needs 

InternalError: Graph execution error:

Detected at node 'sequential/dense/MatMul' defined at (most recent call last):
    File "/usr/local/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/local/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/usr/local/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/usr/local/lib/python3.8/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 728, in start
      self.io_loop.start()
    File "/usr/local/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/usr/local/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/usr/local/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/usr/local/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/usr/local/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 423, in do_execute
      res = shell.run_cell(
    File "/usr/local/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2945, in run_cell
      result = self._run_cell(
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3000, in _run_cell
      return runner(coro)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3203, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3382, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3442, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_368614/356648397.py", line 3, in <module>
      model.fit(training_data, epochs=NUM_EPOCHS, validation_data=(validation_input, validation_target), callbacks=[early_stopping], verbose=2)
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1050, in train_step
      y_pred = self(x, training=True)
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 558, in __call__
      return super().__call__(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/sequential.py", line 412, in call
      return super().call(inputs, training=training, mask=mask)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/layers/core/dense.py", line 241, in call
      outputs = tf.matmul(a=inputs, b=self.kernel)
Node: 'sequential/dense/MatMul'
Attempting to perform BLAS operation using StreamExecutor without BLAS support
	 [[{{node sequential/dense/MatMul}}]] [Op:__inference_train_function_1318]

In [None]:
model2.fit(training_data, epochs=NUM_EPOCHS, validation_data=(validation_input, validation_target), verbose=2)

Epoch 1/25
540/540 - 3s - loss: 0.7052 - accuracy: 0.7756 - val_loss: 0.2669 - val_accuracy: 0.9257 - 3s/epoch - 6ms/step
Epoch 2/25
540/540 - 3s - loss: 0.3246 - accuracy: 0.9017 - val_loss: 0.1569 - val_accuracy: 0.9567 - 3s/epoch - 5ms/step
Epoch 3/25
540/540 - 3s - loss: 0.2482 - accuracy: 0.9267 - val_loss: 0.1211 - val_accuracy: 0.9678 - 3s/epoch - 5ms/step
Epoch 4/25
540/540 - 3s - loss: 0.2132 - accuracy: 0.9369 - val_loss: 0.1047 - val_accuracy: 0.9718 - 3s/epoch - 5ms/step
Epoch 5/25
540/540 - 3s - loss: 0.1816 - accuracy: 0.9454 - val_loss: 0.0913 - val_accuracy: 0.9760 - 3s/epoch - 5ms/step
Epoch 6/25
540/540 - 3s - loss: 0.1572 - accuracy: 0.9534 - val_loss: 0.0805 - val_accuracy: 0.9765 - 3s/epoch - 5ms/step
Epoch 7/25
540/540 - 3s - loss: 0.1452 - accuracy: 0.9577 - val_loss: 0.0710 - val_accuracy: 0.9802 - 3s/epoch - 6ms/step
Epoch 8/25
540/540 - 3s - loss: 0.1258 - accuracy: 0.9649 - val_loss: 0.0625 - val_accuracy: 0.9827 - 3s/epoch - 5ms/step
Epoch 9/25
540/540 - 3s 

<keras.callbacks.History at 0x7fb0b030cf40>

### Test the model

In [None]:
test_loss, test_accuracy = model.evaluate(test_data)
test_loss2, test_accuracy2 = model2.evaluate(test_data)

2023-05-29 15:09:21.226274: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1]
	 [[{{node Placeholder/_1}}]]
2023-05-29 15:09:21.227216: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1]
	 [[{{node Placeholder/_2}}]]




In [None]:
print('Test loss: {0:.2f}, Test accuracy: {1:.2f}%'. format(test_loss, test_accuracy*100.))
print('Test loss 2: {0:.2f}, Test accuracy 2: {1:.2f}%'. format(test_loss2, test_accuracy2*100.))

Test loss: 0.08, Test accuracy: 98.19%
Test loss 2: 0.16, Test accuracy 2: 97.38%
