In [14]:
!pip install tensorflow tensorflow-datasets





In [15]:
import tensorflow as tf
import tensorflow_datasets as tfds

#importing library for tensorflow and the default datasets 

In [16]:
(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

#splitting the 'mnist' dataset in data for training and data for testing
#as_supervised loads data as (image-lable) tuples, shuffle (useful for multi-files data)

In [17]:
def normalize_img(image, label): #simple function to normalize: the image data values range from 0:255, i want them from 0:1
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

ds_train = ds_train.map(
    normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.AUTOTUNE)

#train data pipeline: 
#.map() applies the function normalizing to every single data that goes through the "pipeline", like a like a loop but it doesnt block the whole data process and allows already-processed-data to continue
#.cache() saves data in the cache so they dont need to be recomputed/reloaded every epoch
#.shuffle() shuffle the data to avoid order-bias. IMPORTANT shuffle AFTER saving the data in the cache, otherwise every epoche will read the same cache in the same order
#.batch divides the data in groups, making it easier for parallelism and process
#.prefetch crates a "buffer" so that cpu and gpu can work in parallel to prepare and compute different batches without needing to wait each other

In [18]:
ds_test = ds_test.map(
    normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.AUTOTUNE)

#test data pipeline: same as train data pipeline but no need to shuffle, no need to account for order bias in testing

In [19]:
model = tf.keras.models.Sequential([ #applies the next functions in sequence
  tf.keras.layers.Flatten(input_shape=(28, 28)), #transforms from 28x28 matrixes -> 784x1 vectorù
  tf.keras.layers.Dense(128, activation='relu'), #maps the R^784 space of the features in a R^128 space, it forces the model to optimize the image to projects features into a latent space and discard useless informations
    #the RELU makes the transformation non linear and allows the model to bend the space so that it can better separate the various data (in this case distinguish better the numbers)
  tf.keras.layers.Dense(10)  #maps the R^128 space in a R^10 space and assigns a value (logit) to every possible number (proportional to the likelihood of that numbern in each image)
])
model.compile(#describes the rules for the learning of the model
    optimizer=tf.keras.optimizers.Adam(0.001), #uses ADAM with learning-rate 0.001 to modify and update the weights for the parameters
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),#defines the loss function and transforms the logits appling softmax internally
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()], #outputs the models accuracy
)

model.fit(
    ds_train,
    epochs=6,
    validation_data=ds_test,
)

Epoch 1/6
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.3593 - sparse_categorical_accuracy: 0.8993 - val_loss: 0.1839 - val_sparse_categorical_accuracy: 0.9470
Epoch 2/6
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.1626 - sparse_categorical_accuracy: 0.9533 - val_loss: 0.1325 - val_sparse_categorical_accuracy: 0.9623
Epoch 3/6
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.1166 - sparse_categorical_accuracy: 0.9658 - val_loss: 0.1070 - val_sparse_categorical_accuracy: 0.9685
Epoch 4/6
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.0902 - sparse_categorical_accuracy: 0.9743 - val_loss: 0.0971 - val_sparse_categorical_accuracy: 0.9705
Epoch 5/6
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.0735 - sparse_categorical_accuracy: 0.9784 - val_loss: 0.0871 - val_sparse_categorical_accuracy: 0.9731
Epoch

<keras.src.callbacks.history.History at 0x22f825cda90>