## **Libraries and Dependencies**

In [8]:
# Imports
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, optimizers, applications
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import RandomZoom, RandomRotation, RandomFlip, Rescaling, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from PIL import Image
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, LearningRateScheduler
from tensorflow.keras.applications import VGG16
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
import os
import random
import pandas as pd
import cv2
import logging
import warnings
import json


# Suppress warnings from the logging module
logging.getLogger('tensorflow').setLevel(logging.ERROR)
warnings.filterwarnings("ignore", category=UserWarning)


## **Tensorflow Version**

In [2]:
# Tensorflow Version
print(tf.__version__)

2.10.0


## **GPU Checker**

In [3]:
# Check if any GPU devices are detected
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs detected: {len(gpus)}")
else:
    print("No GPU detected.")

GPUs detected: 1


In [4]:
# Suppress TensorFlow logging except for fatal errors.
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [22]:
# Set seed
SEED = 338424

# Global variables
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
num_classes = 18 # Number of folders in dataset
AUTOTUNE = tf.data.AUTOTUNE

## **VGG-16 Dataset: Loading, Splitting, Shuffling, Caching**

In [23]:
# Load Dataset
dataset_dir = 'dataset/hagridset'
full_ds_vgg16 = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    shuffle=True,
    seed=SEED,
    image_size=(IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Split into training, validation, and test sets
train_ratio_vgg16 = 0.7
val_ratio_vgg16 = 0.2
test_ratio_vgg16 = 0.1

# Total length of the dataset
total_size_vgg16 = len(full_ds_vgg16)

# Compute indices for the splits
train_size_vgg16 = int(total_size_vgg16 * train_ratio_vgg16)
val_size_vgg16 = int(total_size_vgg16 * val_ratio_vgg16)
test_size_vgg16 = total_size_vgg16 - (train_size_vgg16 + val_size_vgg16)

# Split the dataset and shuffle
train_ds_vgg16 = full_ds_vgg16.take(train_size_vgg16).shuffle(train_size_vgg16, seed=SEED)
val_ds_vgg16 = full_ds_vgg16.skip(train_size_vgg16).take(val_size_vgg16).shuffle(val_size_vgg16, seed=SEED)
test_ds_vgg16 = full_ds_vgg16.skip(train_size_vgg16 + val_size_vgg16).shuffle(test_size_vgg16, seed=SEED)

# Cache the dataset in memory (or use a directory to store it on disk if necessary)
train_ds_vgg16 = full_ds_vgg16.take(train_size_vgg16).shuffle(train_size_vgg16, seed=SEED).cache().prefetch(buffer_size=AUTOTUNE)
val_ds = full_ds_vgg16.skip(train_size_vgg16).take(val_size_vgg16).shuffle(val_size_vgg16, seed=SEED).cache().prefetch(buffer_size=AUTOTUNE)
test_ds = full_ds_vgg16.skip(train_size_vgg16 + val_size_vgg16).cache().prefetch(buffer_size=AUTOTUNE)

# Count samples in each subset
def count_samples(dataset):
    sample_count = sum(1 for _ in dataset.unbatch())
    return sample_count

# Output the number of samples for each dataset
print(f'Using {count_samples(train_ds_vgg16)} samples in the Training set')
print(f'Using {count_samples(val_ds_vgg16)} samples in the Validation set')
print(f'Using {count_samples(test_ds_vgg16)} samples in the Test set')

Found 125912 files belonging to 18 classes.
Using 88128 samples in the Training set


: 

## **Regularization Factors**

This code snippet defines the values for L1 and L2 regularization, which are both set to 0.01. It then creates an "Elastic Net Regularizer" that combines these L1 and L2 values to help prevent the model from overfitting by penalizing overly complex or large weight values in the model's learning process.

In [10]:
# Define L1 and L2 regularization factors
l1_factor = 0.01  # Example value
l2_factor = 0.01  # Example value

# Elastic Net Regularizer
elastic_net_regularizer = regularizers.l1_l2(l1=l1_factor, l2=l2_factor)

## **Callbacks: Learning Rate Scheduler and Early Stopping**

In [11]:
# Define a learning rate schedule
def lr_time_based_decay(epoch, lr):
    # This function adjusts the learning rate over each epoch based on the initial learning rate,
    # applying a decay factor that increases with the epoch number. It effectively reduces the 
    # learning rate over time, which can help in calibrating the model adjustments as it 
    # approaches a minimum in the loss surface.
    return lr * 1 / (1 + 0.01 * epoch)

# Define callbacks
callbacks = [
    # EarlyStopping prevents overfitting by stopping training when the validation loss 
    # has not improved for 3 consecutive epochs ('patience=3'). It also restores the 
    # weights of the model to those of the epoch with the best validation loss, ensuring 
    # the model retains the best learned features even if it starts to overfit afterward.
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    # LearningRateScheduler adjusts the learning rate according to the lr_time_based_decay function above.
    # It logs the new learning rate at the start of each epoch ('verbose=1'), helping to control
    # the step size of model updates, which can be crucial for reaching convergence efficiently.
    LearningRateScheduler(lr_time_based_decay, verbose=1)
]

## **Data Augmentation Layers**

In [12]:
# Data Augmentation
data_augmentation_layers = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
])

---

# **Convolutional Neural Networks (CNN): Transfer Learning VGG-16**

---

## **CNN Model: Transfer Learning VGG-16**

In [21]:
# VGG16 model setup
vgg_base = VGG16(include_top=False, input_shape=(128, 128, 3), weights='imagenet')
vgg_base.trainable = False  # Freeze the convolutional base

# Create the model
vgg16_model = Sequential([
    vgg_base,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
vgg16_model.compile(
        # Adam optimizer is used with a specified initial learning rate of 0.001. The learning rate
        # controls how much the weights of the model are adjusted relative to the gradient of the loss 
        # function. A higher learning rate might converge quickly, but too high can cause the training 
        # to diverge. A lower learning rate ensures more reliable convergence but at the risk of slowing
        # down the training process. The chosen rate of 0.001 is a starting point that balances these factors.
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
        )

### **CNN Model: Transfer Learning VGG-16 - Training**

In [17]:
# Train CNN Model
history_vgg16 = vgg16_model.fit(
    train_ds_vgg16,
    validation_data=val_ds_vgg16,
    epochs=10,
    callbacks=callbacks
)


Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/10


ValueError: in user code:

    File "c:\Users\New user\miniconda3\envs\py310new\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\New user\miniconda3\envs\py310new\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\New user\miniconda3\envs\py310new\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\New user\miniconda3\envs\py310new\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\New user\miniconda3\envs\py310new\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\New user\miniconda3\envs\py310new\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 128, 128, 3), found shape=(None, 64, 64, 3)
