<a href="https://colab.research.google.com/github/Sairaj-97/Deep-Learning/blob/main/DataAugmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

In [None]:
(ds_train,ds_test),ds_info = tfds.load(
    'tf_flowers',
    split=['train[:80%]','train[80%:]'],
    with_info=True,
    as_supervised=True
)



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/tf_flowers/3.0.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/tf_flowers/incomplete.VU3AJ6_3.0.1/tf_flowers-train.tfrecord*...:   0%|   …

Dataset tf_flowers downloaded and prepared to /root/tensorflow_datasets/tf_flowers/3.0.1. Subsequent calls will reuse this data.


In [None]:
print(ds_info)

tfds.core.DatasetInfo(
    name='tf_flowers',
    full_name='tf_flowers/3.0.1',
    description="""
    A large set of images of flowers
    """,
    homepage='https://www.tensorflow.org/tutorials/load_data/images',
    data_dir='/root/tensorflow_datasets/tf_flowers/3.0.1',
    file_format=tfrecord,
    download_size=218.21 MiB,
    dataset_size=221.83 MiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=5),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    nondeterministic_order=False,
    splits={
        'train': <SplitInfo num_examples=3670, num_shards=2>,
    },
    citation="""@ONLINE {tfflowers,
    author = "The TensorFlow Team",
    title = "Flowers",
    month = "jan",
    year = "2019",
    url = "http://download.tensorflow.org/example_images/flower_photos.tgz" }""",
)


In [None]:
for image, label in ds_train.take(1):
    print("Image shape:", image.shape)
    print("Label:", label)

Image shape: (333, 500, 3)
Label: tf.Tensor(2, shape=(), dtype=int64)


In [None]:
# Preprocessing function for resizing and normalizing images
def preprocess(image, label):
    # Resize the input image to 128x128 pixels
    # (This ensures all images have the same shape for CNN input)
    image = tf.image.resize(image, (128, 128))

    # Convert image data type from uint8 (0–255) to float32
    image = tf.cast(image, tf.float32)

    # Normalize pixel values from [0, 255] to [0, 1]
    # (This helps the model train faster and perform better)
    image = image / 255.0

    # Return the processed image and its corresponding label
    return image, label


In [None]:
# Apply the preprocessing function to each (image, label) pair in the training dataset
# 'map' applies the function and 'num_parallel_calls=AUTOTUNE' uses multiple CPU threads to speed it up
ds_train = ds_train.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)

# Prefetching overlaps data preprocessing and model execution for better performance
ds_train = ds_train.prefetch(tf.data.AUTOTUNE)

# Apply the same preprocessing steps to the test dataset
ds_test = ds_test.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)

# Prefetch the test data as well to ensure smooth evaluation
ds_test = ds_test.prefetch(tf.data.AUTOTUNE)

for image, label in ds_train.take(1):
    print("Image shape:", image.shape)
    print("Label:", label)


Image shape: (128, 128, 3)
Label: tf.Tensor(2, shape=(), dtype=int64)


#batching is mandatory when using model.fit() in TensorFlow/Keras.



In [None]:
ds_train = ds_train.batch(32)
ds_test = ds_test.batch(32)

In [None]:
dataAugmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.2,fill_mode='nearest'),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomContrast(0.2),
])

In [None]:
model=tf.keras.Sequential([
    tf.keras.layers.Input(shape=(128,128,3)),
    dataAugmentation,
    tf.keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128,activation='relu'),
    tf.keras.layers.Dense(5,activation='softmax')
])

In [None]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

#We dont need to convert the ds_train to numpy array because
tf.data.Dataset is TensorFlow’s recommended input format for scalable and efficient training.

Under the hood, model.fit() can consume:

      tf.data.Dataset

      Numpy arrays

      Pandas DataFrames

      Python generators

In [None]:
model.fit(ds_train,epochs=20)

Epoch 1/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.7623 - loss: 0.6643
Epoch 2/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.7574 - loss: 0.6290
Epoch 3/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.7545 - loss: 0.6225
Epoch 4/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.7457 - loss: 0.6285
Epoch 5/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.7725 - loss: 0.5757
Epoch 6/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.7707 - loss: 0.6024
Epoch 7/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.7770 - loss: 0.5860
Epoch 8/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.8006 - loss: 0.5500
Epoch 9/20
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7a6013be2e50>

In [None]:
loss,accuracy=model.evaluate(ds_test)
print("Loss:",loss)
print("Accuracy:",accuracy)

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7059 - loss: 0.7698
Loss: 0.7283776998519897
Accuracy: 0.7138964533805847


In [None]:
y_pred=model.predict(ds_test)

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


In [None]:
type(y_pred)

numpy.ndarray

In [None]:
y_pred=[np.argmax(i) for i in y_pred]

In [None]:
y_pred[:5]

[np.int64(2), np.int64(2), np.int64(4), np.int64(2), np.int64(1)]

#prediction for 1 batch

In [None]:
batch_pred=[]
actual_labels=[]
for images, labels in ds_test.take(1):
    batch_pred = model.predict(images)
    actual_labels = labels
    break  # Just one batch
batch_pred = [np.argmax(i) for i in batch_pred]

actual_labels = list(actual_labels.numpy())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


In [None]:
print(batch_pred)
print(actual_labels)

[np.int64(2), np.int64(2), np.int64(4), np.int64(2), np.int64(1), np.int64(1), np.int64(1), np.int64(2), np.int64(3), np.int64(3), np.int64(4), np.int64(3), np.int64(2), np.int64(1), np.int64(2), np.int64(4), np.int64(1), np.int64(0), np.int64(1), np.int64(0), np.int64(4), np.int64(4), np.int64(4), np.int64(2), np.int64(3), np.int64(0), np.int64(2), np.int64(1), np.int64(0), np.int64(2), np.int64(0), np.int64(4)]
[np.int64(4), np.int64(2), np.int64(2), np.int64(2), np.int64(1), np.int64(1), np.int64(1), np.int64(2), np.int64(3), np.int64(3), np.int64(4), np.int64(3), np.int64(3), np.int64(2), np.int64(2), np.int64(0), np.int64(2), np.int64(0), np.int64(3), np.int64(0), np.int64(2), np.int64(2), np.int64(4), np.int64(2), np.int64(3), np.int64(0), np.int64(2), np.int64(1), np.int64(0), np.int64(4), np.int64(0), np.int64(3)]
