In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Set seed
SEED = 338424

# Global variables
IMG_SIZE = (64, 64)
BATCH_SIZE = 32
num_classes = 18 # Number of folders in dataset
AUTOTUNE = tf.data.AUTOTUNE

# Define data augmentation layers
data_augmentation_layers = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    #layers.RandomBrightness(0.1),
    #layers.RandomContrast(0.1)
])

# Function to apply data augmentation only to training data
def augment(image, label):
    image = data_augmentation_layers(image)
    return image, label

# Load dataset
dataset_dir = 'dataset/hagridset'
full_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    shuffle=True,
    seed=SEED,
    image_size=(IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Split into training, validation, and test sets
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Calculate sizes for each split
total_size = len(full_ds)
train_size = int(total_size * train_ratio)
val_size = int(total_size * val_ratio)
test_size = total_size - (train_size + val_size)

# Prepare datasets
AUTOTUNE = tf.data.AUTOTUNE

# Training set
train_ds = (
    full_ds.take(train_size)  # Take first N batches for training
    .shuffle(train_size, seed=SEED)  # Shuffle the training set
    .map(augment, num_parallel_calls=AUTOTUNE)  # Apply data augmentation
    .cache()  # Cache the dataset to speed up subsequent epochs
    .prefetch(buffer_size=AUTOTUNE)  # Prefetch data to reduce latency
)

# Validation set
val_ds = (
    full_ds.skip(train_size)  # Skip training batches
    .take(val_size)  # Take next N batches for validation
    .cache()  # Cache validation data
    .prefetch(buffer_size=AUTOTUNE)  # Prefetch data to reduce latency
)

# Test set
test_ds = (
    full_ds.skip(train_size + val_size)  # Skip training and validation batches
    .cache()  # Cache test data
    .prefetch(buffer_size=AUTOTUNE)  # Prefetch data to reduce latency
)

# Count samples in each subset
def count_samples(dataset):
    return sum(1 for _ in dataset.unbatch())

print(f'Using {count_samples(train_ds)} samples in the Training set')
print(f'Using {count_samples(val_ds)} samples in the Validation set')
print(f'Using {count_samples(test_ds)} samples in the Test set')


# Get class names
class_names = full_ds.class_names
class_names

# Define your model with the data augmentation layer embedded
def build_scratch_model_da2_cnn():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(data_augmentation_layers)
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values

    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Flatten())
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(
        optimizer='adam',
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
        metrics=['accuracy']
    )
    return model

# Instantiate the model
scratch_model_da2 = build_scratch_model_da2_cnn()
scratch_model_da2.summary()

# Train the model
history_custom = scratch_model_da2.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
)






Found 125912 files belonging to 18 classes.
Using 88128 samples in the Training set
Using 25184 samples in the Validation set
Using 12600 samples in the Test set
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_4 (Sequential)   (None, 64, 64, 3)         0         
                                                                 
 rescaling_2 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_12 (Conv2D)          (None, 64, 64, 32)        896       
                                                                 
 batch_normalization_12 (Bat  (None, 64, 64, 32)       128       
 chNormalization)                                                
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 32, 32, 32)       0         
 g2D)                   

In [2]:
# Evaluate the CNN Data Augmentation Deep Model
scratch_model_da2.evaluate(test_ds)



[0.5706376433372498, 0.8149206638336182]