In [7]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [11]:
## Extraction Phase, where the data is loaded form the internet or other sources
train = tfds.load(name = "fashion_mnist", split = "train[:80%]", as_supervised = True) # simple way to load tfds datasets and make custom splits on the data
validation = tfds.load(name = "fashion_mnist", split = "train[80%:90%]", as_supervised = True) 
test = tfds.load(name = "fashion_mnist", split = "train[90%:]", as_supervised = True) 

In [17]:
## Transorm Phase where the data is batched and augmented to prepare it for training
## Batch and Augment data before training
# Augmentation function
def augment_images(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.random_flip_left_right(image)     # random horizontal flip
    image = tf.image.random_flip_up_down(image)        # random vertical flip
    image = tf.image.random_brightness(image, max_delta=0.2)  # brightness jitter
    image = tf.image.random_contrast(image, 0.8, 1.2)  # contrast jitter
    return image, label

# Batch size
BATCH_SIZE = 32

# Apply map → shuffle → batch → prefetch
train = (train.map(augment_images)
              .shuffle(10000)
              .batch(BATCH_SIZE)
              )

validation = (validation.map(augment_images)
                       .batch(BATCH_SIZE)
                       )

test = (test.map(augment_images)
             .batch(BATCH_SIZE)
             )

In [19]:
## Because of the way the data was loaded, the shape is now 28,28,1 
## And this has to be specified in the model that we train on this data
## Load Phase, where the data is Loaded into the model for training

model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28,1)), # Specify correct input shape
                                    tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                    tf.keras.layers.Dropout(0.2),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

model.fit(train, validation_data = validation, epochs=5)

  super().__init__(**kwargs)


Epoch 1/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 6ms/step - accuracy: 0.7324 - loss: 0.7519 - val_accuracy: 0.8072 - val_loss: 0.5474
Epoch 2/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.8015 - loss: 0.5584 - val_accuracy: 0.8180 - val_loss: 0.4922
Epoch 3/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8171 - loss: 0.5136 - val_accuracy: 0.8440 - val_loss: 0.4296
Epoch 4/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8257 - loss: 0.4876 - val_accuracy: 0.8422 - val_loss: 0.4347
Epoch 5/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8320 - loss: 0.4697 - val_accuracy: 0.8455 - val_loss: 0.4296


<keras.src.callbacks.history.History at 0x247dd54c890>

In [17]:
## Optimizing ETL process in tensorflow
## we can do this by performing the Extraction and the loading in parallel, saving time and resources
## We do not keep our other resources idle, while we are training. We extrac and train in parallel
data = tfds.load('horses_or_humans', split='train', as_supervised=True)
train_batches = data.shuffle(100).batch(10)
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3)),
                                    tf.keras.layers.MaxPooling2D(2, 2),
                                    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(512, activation='relu'),
                                    tf.keras.layers.Dense(1, activation='sigmoid')
                                   ])

model.compile(optimizer='Adam', loss='binary_crossentropy',metrics=['accuracy'])
history = model.fit(train_batches, epochs=5)



[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\Yusuf Solomon\tensorflow_datasets\horses_or_humans\3.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\Yusuf Solomon\tensorflow_datasets\horses_or_humans\incomplete.DFF3WP_3.0.0\horses_or_humans…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\Yusuf Solomon\tensorflow_datasets\horses_or_humans\incomplete.DFF3WP_3.0.0\horses_or_humans…

[1mDataset horses_or_humans downloaded and prepared to C:\Users\Yusuf Solomon\tensorflow_datasets\horses_or_humans\3.0.0. Subsequent calls will reuse this data.[0m
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 515ms/step - accuracy: 0.8315 - loss: 1.6417
Epoch 2/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 489ms/step - accuracy: 0.9387 - loss: 0.1974
Epoch 3/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 497ms/step - accuracy: 0.9776 - loss: 0.0822
Epoch 4/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 546ms/step - accuracy: 0.9834 - loss: 0.0518
Epoch 5/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 516ms/step - accuracy: 0.9464 - loss: 0.1723
Epoch 6/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 471ms/step - accuracy: 0.9893 - loss: 0.0433
Epoch 7/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 486ms/step - accuracy: 1.0000 - loss: 0.0011
Epoch 8/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 475ms/step - accuracy: 1.0000 - loss: 8.7488e-05
Epoch 9/10
[1m103/103