<a href="https://colab.research.google.com/github/DJALLIL05/Predicting-Customer-Behavior/blob/main/hand_written_digits_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## importing the libraries

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfd

## Loading the dataset

In [None]:
# loading the mnist dataset
mnist_dataset, mnist_info = tfd.load(name = 'mnist', with_info=True, as_supervised=True)
print('description of the mnist dataset:\n', mnist_info)

description of the mnist dataset:
 tfds.core.DatasetInfo(
    name='mnist',
    version=3.0.0,
    description='The MNIST database of handwritten digits.',
    homepage='http://yann.lecun.com/exdb/mnist/',
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)



## Splitting the data into training, validation and testing sets

In [None]:
# splitting the data into training and test datasets
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
# choosing 10% of the training data as validation data
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

## Scaling the data

In [None]:
def scale(image, label):
  """scale: function to scale the data
  args:
  image: a tensor of shape (28,28,1)
  label: takes values from 0 to 9
  retrun:
  image: flatten the tensor into a vector
  label: the same as the input label"""
  image = tf.cast(image, tf.float32)
  image /= 225.
  return image, label

In [None]:
# scaling the training and testing datasets
scaled_train_and_validation_data = mnist_train.map(scale)
scaled_test_data = mnist_test.map(scale)

## Shuffling the data

In [None]:
# shuffling the training, validatio and testing datasets
BUFFER_SIZE = 10000
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
validation_data =  shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)
test_data = scaled_test_data.shuffle(100)

## Setting batch sizes for train, validation and test data

In [None]:
# setting the batch size for the train, validation and test datasets
BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)
validation_inputs, validation_targets = next(iter(validation_data))

## Creating the neural network model

In [None]:
# creating the model
input_size = 784
output_size = 10
hidden_layer_size = 50
model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape = (28, 28, 1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(output_size, activation = 'softmax')])

## Training the model

In [None]:
# setting the optimization algorithm and the loss function
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
# training the model
NUM_EPOCHS = 5
model.fit(train_data, epochs = NUM_EPOCHS, validation_data = (validation_inputs, validation_targets), verbose = 2)

Epoch 1/5
540/540 - 6s - loss: 0.4045 - accuracy: 0.8848 - val_loss: 0.2087 - val_accuracy: 0.9410
Epoch 2/5
540/540 - 6s - loss: 0.1795 - accuracy: 0.9480 - val_loss: 0.1531 - val_accuracy: 0.9545
Epoch 3/5
540/540 - 7s - loss: 0.1384 - accuracy: 0.9585 - val_loss: 0.1265 - val_accuracy: 0.9620
Epoch 4/5
540/540 - 6s - loss: 0.1163 - accuracy: 0.9652 - val_loss: 0.1132 - val_accuracy: 0.9642
Epoch 5/5
540/540 - 7s - loss: 0.0976 - accuracy: 0.9702 - val_loss: 0.0923 - val_accuracy: 0.9722


<tensorflow.python.keras.callbacks.History at 0x7f78861bb668>

## Evaluating model performance

In [None]:
# testing the model
test_loss, test_accuracy = model.evaluate(test_data)
print('Test loss: {0: .2f}. Test accuracy: {1: 2f}%'.format(test_loss, test_accuracy * 100))

Test loss:  0.10. Test accuracy:  96.890002%
