In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [2]:
data_dir = "../dataset/raw-img"  # Path to the dataset folder
img_size = (224, 224)  # Resize images to this size
batch_size = 32  # Number of images to process at once


In [4]:
def create_data_generators(data_dir, img_size, batch_size=32):
    # Define ImageDataGenerators for data augmentation and rescaling
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2,  # 20% of the data will be used for validation
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    val_test_datagen = ImageDataGenerator(rescale=1./255)

    # Set up the training generator with the specified subset (training set)
    train_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',  # Specify that this is the training subset
        seed=42
    )

    # Set up the validation generator with the specified subset (validation set)
    val_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation',  # Specify that this is the validation subset
        seed=42
    )

    # Set up the test generator without using subsets
    test_generator = val_test_datagen.flow_from_directory(
        data_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        seed=42
    )

    return train_generator, val_generator, test_generator

In [5]:
train_generator, val_generator, test_generator = create_data_generators(data_dir, img_size)

# Check the number of images in each set
print(f"Training set: {train_generator.samples} images in {train_generator.samples // train_generator.batch_size} batches")
print(f"Validation set: {val_generator.samples} images in {val_generator.samples // val_generator.batch_size} batches")
print(f"Test set: {test_generator.samples} images in {test_generator.samples // test_generator.batch_size} batches")


Found 20947 images belonging to 10 classes.
Found 5232 images belonging to 10 classes.
Found 26179 images belonging to 10 classes.
Training set: 20947 images in 654 batches
Validation set: 5232 images in 163 batches
Test set: 26179 images in 818 batches


In [6]:
# Create the CNN model for 10 animal classes
cnn = tf.keras.models.Sequential()

# First Convolutional Layer
cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding="same", input_shape=[224, 224, 3]))  # Adjust input shape for your image size
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))  # First Pooling Layer

# Second Convolutional Layer
cnn.add(tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding="same"))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))  # Second Pooling Layer

# Third Convolutional Layer
cnn.add(tf.keras.layers.Conv2D(filters=256, kernel_size=3, activation="relu", padding="same"))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))  # Third Pooling Layer

# Flatten the output for Dense Layer input
cnn.add(tf.keras.layers.Flatten())

# Fully Connected Layer (Dense Layer)
cnn.add(tf.keras.layers.Dense(units=512, activation="relu"))
cnn.add(tf.keras.layers.Dropout(0.5))  # Dropout to prevent overfitting

# Second Fully Connected Layer
cnn.add(tf.keras.layers.Dense(units=256, activation="relu"))
cnn.add(tf.keras.layers.Dropout(0.5))

# Output Layer (10 classes, so use softmax activation)
cnn.add(tf.keras.layers.Dense(units=10, activation="softmax"))

# Model Summary
cnn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# Compile the Model
cnn.compile(loss="categorical_crossentropy",  # Use categorical_crossentropy for multi-class classification
              optimizer="adam",
              metrics=["accuracy"])


In [8]:
# Early stopping to monitor validation accuracy
early_stopping = EarlyStopping(
    monitor='val_accuracy',  # Monitors validation accuracy
    patience=5,             # Stops after 5 epochs without improvement
    restore_best_weights=True  # Restores the weights with the best validation accuracy
)

In [9]:
# Learning rate scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',  # Monitor the validation loss
    factor=0.5,          # Reduce the learning rate by half
    patience=3,          # Wait for 3 epochs without improvement
    min_lr=1e-6          # Minimum learning rate
)

In [10]:
history = cnn.fit(
    train_generator,
    validation_data=val_generator,
    epochs=50,  # Train for many epochs; early stopping will stop if needed
    callbacks=[early_stopping, lr_scheduler]  # Add callbacks for early stopping and learning rate adjustment
)

  self._warn_if_super_not_called()


Epoch 1/50
[1m  2/655[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15:41[0m 1s/step - accuracy: 0.1094 - loss: 7.0353

KeyboardInterrupt: 

In [None]:
# Evaluate the model on the test data (use the test set if available)
score = cnn.evaluate(test_generator)
print('Total loss on Test Set:', score[0])
print('Accuracy of Test Set:', score[1])

In [None]:
# Italian class names (should match the training data)
italian_class_names = [
    "cane", "cavallo", "elefante", "farfalla", "gallina", "gatto",
    "mucca", "pecora", "scoiattolo", "ragno"
]

# English translation dictionary
translate = {
    "cane": "dog",
    "cavallo": "horse",
    "elefante": "elephant",
    "farfalla": "butterfly",
    "gallina": "chicken",
    "gatto": "cat",
    "mucca": "cow",
    "pecora": "sheep",
    "scoiattolo": "squirrel",
    "ragno": "spider"
}

# Get a batch of data from the test set
X_new, y_new = next(test_generator)

# Make predictions
y_proba = cnn.predict(X_new)
y_pred = np.argmax(y_proba, axis=-1)  # Convert probabilities to predicted class indices

# Display predictions and images
plt.figure(figsize=(7.2, 2.4))
for index, image in enumerate(X_new[:3]):  # Show the first 3 images in the batch
    plt.subplot(1, 3, index + 1)
    plt.imshow(image)
    plt.axis('off')

    # Translate the predicted class from Italian to English
    predicted_label_italian = italian_class_names[y_pred[index]]  # Italian prediction
    predicted_label = translate[predicted_label_italian]  # Translate to English

    # Translate the true label from Italian to English
    true_label_italian = italian_class_names[np.argmax(y_new[index])]  # Get Italian name from the true label
    true_label = translate[true_label_italian]  # Translate to English

    plt.title(f"Pred: {predicted_label}\nTrue: {true_label}", fontsize=12)

plt.subplots_adjust(wspace=0.2, hspace=0.5)
plt.show()


In [None]:
# Save the model in the native Keras format
cnn.save('animal_classification_model.keras')  # Save the model in Keras format
