# Modules importation

In [1]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

import os
import shutil
import random

from tensorflow.keras.applications.vgg16 import VGG16, decode_predictions
from tensorflow.keras.layers import Dense, Flatten, Input, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras import Model, Sequential
from tensorflow.keras.optimizers import SGD

2025-10-22 14:48:12.551958: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Functions

In [2]:
def print_results_dic(dic_result):
    print("Accuracy on test set is:")
    for key, value in dic_result.items():
        print(f"{value*100:.1f}% : {key}")

# Data importation
Origin: Kaggle animal_data : [link](https://www.kaggle.com/datasets/likhon148/animal-data/data)

## Split data in train and test

In [3]:
def copy_files(src_dir, dst_dir, files):
    # Make sure destination folder exists
    os.makedirs(dst_dir, exist_ok=True)
    # Loop over selected indices and copy files
    for file_name in files:
        #file_name = files[idx]
        src_path = os.path.join(src_dir, file_name)
        dst_path = os.path.join(dst_dir, file_name)
        shutil.copy(src_path, dst_path)

if not os.path.isdir("../Data/animal_data/_train/"): #If already done doesn't do it again
    folder_path = r"../Data/animal_data/" # Put the path to your dataset
    ratio_train = 0.8 # Ratio of images in the train dataset
    ratio_val=0.1 #ratio of images in the validation set in the train set

    directories = [name for name in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, name))]
    print("Directories:")
    print(directories)


    for dir in directories: 
        print(dir)
        src_dir = folder_path+dir
        files = np.array([f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))])
        
        files_train,files_test=train_test_split(files, train_size=ratio_train)
        files_train,files_val=train_test_split(files_train, test_size=ratio_val)
        
        # n_files = len(files)
        # train_size = round(ratio_train * n_files)
        # idx_train = random.sample(range(n_files), train_size)
        # idx_test = list(set(range(n_files)) - set(idx_train))
        # idx_val= list(set)
        
        #TRAIN
        dst_dir = folder_path+'_train/'+dir+'/'
        copy_files(src_dir, dst_dir, files_train)
        
        #Validation
        dst_dir = folder_path+'_validation/'+dir+'/'
        copy_files(src_dir, dst_dir, files_val)

        #TEST
        dst_dir = folder_path+'_test/'+dir+'/'
        copy_files(src_dir, dst_dir, files_test)

## Data loading 

In [4]:
train_datagen=ImageDataGenerator(
    #rescale=1/255.0,
    rotation_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)
train_generator=train_datagen.flow_from_directory(
    directory=r"../Data/animal_data/_train/",
    target_size=(224, 224),
    color_mode="rgb",
    batch_size=128,
    class_mode="categorical",
    shuffle=True,
    seed=42
)

val_datagen=ImageDataGenerator()
val_generator=val_datagen.flow_from_directory(
    directory=r"../Data/animal_data/_validation/",
    target_size=(224, 224),
    color_mode="rgb",
    batch_size=128,
    class_mode="categorical",
    shuffle=True,
    seed=42
)

test_datagen=ImageDataGenerator()
test_generator=test_datagen.flow_from_directory(
    directory=r"../Data/animal_data/_test/",
    target_size=(224, 224),
    color_mode="rgb",
    batch_size=1,
    class_mode="categorical",
    shuffle=False,
    seed=42
)

Found 1388 images belonging to 15 classes.
Found 161 images belonging to 15 classes.
Found 395 images belonging to 15 classes.


# my CNN

In [7]:
model_cnn=Sequential([
    Input(shape=(224,224,3)),
    Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'),
    Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'),
    MaxPooling2D(pool_size=2),
    Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'),
    Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'),
    MaxPooling2D(pool_size=2),
    Flatten(),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(15, activation='softmax')
])
model_cnn.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [9]:
model_cnn.fit(
    train_generator,
    epochs=2, validation_data=val_generator)

Epoch 1/2
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 8s/step - accuracy: 0.0749 - loss: 2.6830 - val_accuracy: 0.0683 - val_loss: 2.7081
Epoch 2/2
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 8s/step - accuracy: 0.0821 - loss: 2.6886 - val_accuracy: 0.0932 - val_loss: 2.7168


<keras.src.callbacks.history.History at 0x7a68581a5bd0>

In [10]:
dic_scores=dict()
dic_scores["CNN"]=model_cnn.evaluate(test_generator)[1]
print_results_dic(dic_scores)

[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.1038 - loss: 2.6356
Accuracy on test set is:
10.4% : CNN


# VGG16

In [29]:
model=VGG16() #Import the VGG16 model

In [30]:
# STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
# STEP_SIZE_VAL=val_generator.n//val_generator.batch_size

In [31]:
# model.fit_generator(
#     generator=train_generator,
#     steps_per_epoch=STEP_SIZE_TRAIN,
#     validation_data=val_generator,
#     validation_steps=STEP_SIZE_VAL,
#     epochs=10
# )

In [32]:
y_pred_vgg16=model.predict(test_generator)

  self._warn_if_super_not_called()


[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 143ms/step


In [None]:
predictions=decode_predictions(y_pred_vgg16)

first_predictions=list()
for pred in predictions:
    first_predictions.append(pred[0][1])

first_predictions=np.array(first_predictions)

In [75]:
import collections
print("The three most predicted categories by VGG16 are:")
unique_pred=collections.Counter(first_predictions)
for animal in list(unique_pred.keys())[:3]:
    print(f"{animal}: {unique_pred[animal]}")

The three most predicted categories by VGG16 are:
ice_bear: 12
brown_bear: 8
American_black_bear: 1


# Transfer learning VGG16

In [11]:
#Load pre-trained VGG-16 on ImageNet without the last fully-connected layers
model_2=VGG16(weights="imagenet", include_top=False, input_shape=(224,224,3))

# We do not train the layers in VGG16
for layer in model_2.layers:
    layer.trainable=False

In [12]:
model_3=Sequential([
    model_2,
    Flatten(),
    Dense(15, activation='softmax')
])

In [13]:
model_3.compile(
    loss="categorical_crossentropy",
    optimizer=SGD(learning_rate=0.0001, momentum=0.9),
    metrics=["accuracy"]
)

In [14]:
model_3.fit(
    train_generator,
    epochs=2,
    #verbose=2
)

Epoch 1/2
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 13s/step - accuracy: 0.2788 - loss: 13.8526
Epoch 2/2
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 13s/step - accuracy: 0.7133 - loss: 3.7251


<keras.src.callbacks.history.History at 0x7a68a85b3b90>

In [16]:
dic_scores["Augmented VGG16"]=model_3.evaluate(test_generator)[1]
print_results_dic(dic_scores)

[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 104ms/step - accuracy: 0.8127 - loss: 2.7587
Accuracy on test set is:
10.4% : CNN
81.3% : Augmented VGG16
