In [1]:
import keras
import tensorflow as tf
import random
import os
import numpy as np
import tensorflow_hub as hub
from PIL import Image
import keras_cv

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# set log level
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

2024-12-15 17:15:30.423793: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-15 17:15:30.432646: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734279330.442089  151280 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734279330.444920  151280 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-15 17:15:30.455360: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
class CustomDataGen(keras.utils.Sequence):
    def __init__(self, data_folder: str, aug_len: dict[str, float], batch_size=32, split="train", train_ratio=0.8, seed=42):
        super().__init__()

        self.data_folder = data_folder
        self.aug_len = aug_len
        self.batch_size = batch_size
        self.data = []

        self.pipeline = tf.keras.Sequential([
            keras.layers.RandomFlip("horizontal_and_vertical"),
            keras.layers.RandomRotation(0.2),
            keras.layers.RandomZoom(0.1),
            keras.layers.RandomContrast(0.01),
            keras.layers.Rescaling(1./255),
            keras.layers.Resizing(224, 224),
            keras.layers.Normalization(
                mean=(0.485, 0.456, 0.406), 
                variance=(0.229, 0.224, 0.225)
            )
        ])

        self.split = split
        self.train_ratio = train_ratio
        self.seed = seed
        self.load_data()
        self.classes = {label: idx for idx, label in enumerate(set([item["label"] for item in self.data]))}

    
    def load_data(self):

        all_files = []
        for root, _, files in os.walk(self.data_folder):
            ext = os.path.splitext(files[0])[1]
            if ext not in [".jpg", ".jpeg", ".png"]:
                continue
            subfolder = os.path.basename(root)
            for file in files:
                all_files.append({
                    "path": os.path.join(root, file),
                    "label": subfolder
                })

        random.seed(self.seed)
        random.shuffle(all_files)
        

        split_idx = int(len(all_files) * self.train_ratio)
        

        if self.split == "train":
            base_files = all_files[:split_idx]
        else:
            base_files = all_files[split_idx:]
        

        self.data = []
        for file in base_files:
            if self.split == "train" and file["label"] in self.aug_len:
                prob = self.aug_len[file["label"]]
                for _ in range(int(prob)):
                    self.data.append(file)
                if np.random.rand() < (prob - int(prob)):
                    self.data.append(file)
            else:
                self.data.append(file)
        
        random.shuffle(self.data)

    def __len__(self):
        return int(np.ceil(len(self.data) / self.batch_size))
    
    def __getitem__(self, idx):
        batch_data = self.data[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_images = []
        batch_labels = []
        for item in batch_data:
            img = Image.open(item["path"])
            img = np.array(img)
            if img.ndim == 2:
                img = np.stack((img,) * 3, axis=-1)
            elif img.shape[2] == 1:
                img = np.concatenate([img] * 3, axis=-1)
            img = self.pipeline(img)
            batch_images.append(img)
            batch_labels.append(self.classes[item["label"]])

        return np.array(batch_images), np.array(batch_labels)
    
    def on_epoch_end(self):
        random.shuffle(self.data)
        


In [3]:
train = CustomDataGen("/home/shared/Mammiferes_test",{
    "Castor": 1.5,
    "Chat": 1.5,
    "Chien": 1.5,
    "Coyote": 1.5,
    "Ecureuil": 1.5,
    "Lapin": 1.5,
    "Loup": 1.5,
    "Lynx" : 1.5,
    "Ours" : 1.5,
    "Puma" : 1.5,
    "Rat": 1.5,
    "Raton_laveur": 1.5,
    "Renard": 1.5,
}, batch_size=32, split="train")
val = CustomDataGen("/home/shared/Mammiferes_test",{
    "Castor": 1.5,
    "Chat": 1.5,
    "Chien": 1.5,
    "Coyote": 1.5,
    "Ecureuil": 1.5,
    "Lapin": 1.5,
    "Loup": 1.5,
    "Lynx" : 1.5,
    "Ours" : 1.5,
    "Puma" : 1.5,
    "Rat": 1.5,
    "Raton_laveur": 1.5,
    "Renard": 1.5,
}, batch_size=32, split="val")

I0000 00:00:1734279331.809198  151280 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22282 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:01:00.0, compute capability: 8.9


In [7]:
model = tf.keras.applications.EfficientNetB0(
    weights='imagenet',
    input_tensor=None,
    input_shape=None,
    pooling=None,
    include_top=False,
)

model.trainable = False

model_with_classification = tf.keras.Sequential([
    model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dense(13, activation="softmax")
])

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [8]:
model_with_classification.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)


In [9]:
model_with_classification.fit(train, validation_data=val, epochs=10)

Epoch 1/10
[1m 5/10[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m3s[0m 636ms/step - accuracy: 0.0705 - loss: 2.6444

E0000 00:00:1734279695.980005  151412 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1734279696.528975  151412 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 942ms/step - accuracy: 0.0798 - loss: 2.6710









[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2s/step - accuracy: 0.0801 - loss: 2.6717 - val_accuracy: 0.0980 - val_loss: 2.6321
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 867ms/step - accuracy: 0.0520 - loss: 2.6685 - val_accuracy: 0.0784 - val_loss: 2.6124
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 835ms/step - accuracy: 0.0634 - loss: 2.5856 - val_accuracy: 0.0392 - val_loss: 2.6481
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 913ms/step - accuracy: 0.1397 - loss: 2.5416 - val_accuracy: 0.0784 - val_loss: 2.5710
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 877ms/step - accuracy: 0.1070 - loss: 2.5360 - val_accuracy: 0.0392 - val_loss: 2.5801
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 918ms/step - accuracy: 0.1377 - loss: 2.5452 - val_accuracy: 0.0392 - val_loss: 2.6223
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x74cfd0011f10>