In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# 🌊 Core Libraries
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 🧠 TensorFlow & Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import (
    ImageDataGenerator,
    load_img,
    img_to_array,
)

# 🧱 Keras Layers
from tensorflow.keras.layers import (
    Input,
    Conv2D,
    BatchNormalization,
    MaxPool2D,
    Dropout,
    Dense,
    GlobalAveragePooling2D,
    Flatten,
    Add,
)

seed_value = 42

np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# 📂 Explore dataset paths
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!ls ./kaggle/working

[34mships32[m[m


## Exploration, Analyse, Data Preprocessing

In [None]:
# Définir les chemins des dossiers source et destination
source_dir = "./ships32"
dest_dir = "./ships32augmented"

# Créer le dossier de destination s'il n'existe pas déjà
if os.path.exists(dest_dir):
    shutil.rmtree(dest_dir)  # Supprimer s'il existe déjà
os.makedirs(dest_dir)

# Obtenir la liste des sous-dossiers (classes) dans le dossier source
classes = os.listdir(source_dir)

# Créer les mêmes sous-dossiers dans le dossier de destination
for class_name in classes:
    if os.path.isdir(os.path.join(source_dir, class_name)):
        os.makedirs(os.path.join(dest_dir, class_name), exist_ok=True)

In [None]:
class_counts = {}
for image, label in train_ds.unbatch():
    label_val = label.numpy()  # Get the actual label value
    if label_val in class_counts:
        class_counts[label_val] += 1
    else:
        class_counts[label_val] = 1

# Plot class distribution
plt.figure(figsize=(12, 6))
classes = list(class_counts.keys())
counts = list(class_counts.values())
plt.bar(classes, counts)
plt.xlabel("Boat Class")
plt.ylabel("Number of Images")
plt.title("Class Distribution in Training Dataset")
plt.show()

In [None]:
# Configurer le générateur d'augmentation de données
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    brightness_range=[0.9, 1.1],
    fill_mode="nearest",
)

# Nombre d'images augmentées à générer pour chaque image d'origine
num_augmented_per_img = 10

# Parcourir chaque classe et chaque image
for class_name in classes:
    if not os.path.isdir(os.path.join(source_dir, class_name)):
        continue

    class_path = os.path.join(source_dir, class_name)
    dest_class_path = os.path.join(dest_dir, class_name)

    # Copier d'abord toutes les images originales
    for img_name in os.listdir(class_path):
        if img_name.endswith((".jpg", ".jpeg", ".png")):
            img_path = os.path.join(class_path, img_name)
            shutil.copy2(img_path, dest_class_path)

            # Charger l'image et la préparer pour l'augmentation
            img = load_img(img_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)

            # Générer des images augmentées
            i = 0
            for batch in datagen.flow(
                x,
                batch_size=1,
                save_to_dir=dest_class_path,
                save_prefix=os.path.splitext(img_name)[0] + "_aug",
                save_format="png",
            ):
                i += 1
                if i >= num_augmented_per_img:
                    break

# Afficher quelques statistiques
original_count = sum(
    [
        len(os.listdir(os.path.join(source_dir, c)))
        for c in classes
        if os.path.isdir(os.path.join(source_dir, c))
    ]
)
augmented_count = sum(
    [
        len(os.listdir(os.path.join(dest_dir, c)))
        for c in classes
        if os.path.isdir(os.path.join(dest_dir, c))
    ]
)

print(f"Nombre d'images originales: {original_count}")
print(f"Nombre d'images après augmentation: {augmented_count}")

In [None]:
print("ships32augmented/ferry/0")
# Trouver toutes les versions augmentées de la première image
base_name = "0_aug"
augmented_images = [
    img for img in os.listdir(dest_class_path) if img.startswith(base_name)
]

# Créer une grille pour afficher les images
fig, axes = plt.subplots(3, 4, figsize=(15, 10))
axes = axes.ravel()

# Afficher l'image originale
original_img = load_img(os.path.join("ships32augmented/", class_name, "2.jpg"))
axes[0].imshow(original_img)
axes[0].set_title("Image originale")
axes[0].axis("off")

# Afficher les images augmentées
for idx, img_name in enumerate(augmented_images[:10], 1):
    img_path = os.path.join(dest_class_path, img_name)
    img = load_img(img_path)
    axes[idx].imshow(img)
    axes[idx].set_title(f"Augmentation {idx}")
    axes[idx].axis("off")

plt.tight_layout()
plt.show()

In [None]:
directory = "ships32augmented"
train_ds = keras.utils.image_dataset_from_directory(
    directory,
    color_mode="rgb",
    batch_size=128,
    image_size=(32, 32),
    seed=123,
    validation_split=0.2,
    shuffle=True,
    subset="training",
)
val_ds = keras.utils.image_dataset_from_directory(
    directory,
    color_mode="rgb",
    batch_size=128,
    image_size=(32, 32),
    seed=123,
    validation_split=0.2,
    shuffle=True,
    subset="validation",
)
class_names = train_ds.class_names

In [None]:
normalization_layer = tf.keras.layers.Rescaling(1.0 / 255)

# Application de la normalisation aux datasets
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

In [None]:
# Define AUTOTUNE to utilize TensorFlow's automatic optimization
AUTOTUNE = tf.data.AUTOTUNE

# Cache, shuffle, and prefetch the training dataset for improved performance
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Cache and prefetch the validation dataset for improved performance
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

## Model

In [None]:
def BoatNet_30(input_shape=(32, 32, 3), num_classes=13):
    inputs = Input(shape=input_shape)

    # Block 1
    x = Conv2D(
        64, (3, 3), padding="same", activation="relu", kernel_regularizer=l2(2e-4)
    )(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(
        64, (3, 3), padding="same", activation="relu", kernel_regularizer=l2(2e-4)
    )(x)
    x = BatchNormalization()(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)

    # Block 2
    x = Conv2D(
        128, (3, 3), padding="same", activation="relu", kernel_regularizer=l2(2e-4)
    )(x)
    x = BatchNormalization()(x)
    x = Conv2D(
        128, (3, 3), padding="same", activation="relu", kernel_regularizer=l2(2e-4)
    )(x)
    x = BatchNormalization()(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Dropout(0.3)(x)

    # Residual Block
    # Shortcut before main path
    shortcut = Conv2D(128, (1, 1), padding="same")(x)  # 1
    # Main path
    res = Conv2D(128, (3, 3), padding="same", activation="relu")(x)  # 2
    res = BatchNormalization()(res)  # 3
    # Add skip connection
    x = Add()([shortcut, res])

    # Block 3
    x = Conv2D(
        256, (3, 3), padding="same", activation="relu", kernel_regularizer=l2(2e-4)
    )(x)
    x = BatchNormalization()(x)
    x = Conv2D(
        256, (3, 3), padding="same", activation="relu", kernel_regularizer=l2(2e-4)
    )(x)
    x = BatchNormalization()(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Dropout(0.4)(x)

    # Block 4
    x = Conv2D(
        512, (3, 3), padding="same", activation="relu", kernel_regularizer=l2(2e-4)
    )(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)

    # Classification
    x = GlobalAveragePooling2D()(x)
    x = Dense(768, activation="relu", kernel_regularizer=l2(2e-4))(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes)(x)  # from_logits=True for sparse loss

    return Model(inputs=inputs, outputs=outputs)


In [None]:
# Créer le modèle
model = BoatNet_30()

# Schedule d'apprentissage plus efficace
initial_learning_rate = 1e-3
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=5000, decay_rate=0.9, staircase=True
)

# Optimizer avec momentum
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Compilation améliorée
model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

# Print model summary and layer count
model.summary()
print(f"Number of layers: {len(model.layers)}")


In [None]:
# Optimized callbacks for better training
callbacks = [
    # Early stopping plus patient
    tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=10,
        restore_best_weights=True,
        verbose=1,
    ),
    # Réduction du LR plus progressive
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.7,  # Moins agressif (0.7 au lieu de 0.5)
        patience=5,
        min_lr=5e-6,  # try with 9e-6
        verbose=1,
    ),
    # Model checkpointing
    tf.keras.callbacks.ModelCheckpoint(
        "best_ship_model.keras", save_best_only=True, monitor="val_loss", verbose=1
    ),
]

# Train model with sufficient epochs
history = model.fit(
    train_ds, validation_data=val_ds, epochs=150, callbacks=callbacks, verbose=1
)

## Résultat à soumettre

In [3]:
X_test = np.load("./kaggle/input/navires-2025/ships_competition.npz", allow_pickle=True)[
    "X"
]
X_test = X_test.astype("float32") / 255


In [None]:
res = model.predict(X_test).argmax(axis=1)
df = pd.DataFrame({"Category": res})
df.to_csv("reco_nav.csv", index_label="Id")


I0000 00:00:1745326710.092582 25887979 service.cc:152] XLA service 0x142fb4200 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745326710.092599 25887979 service.cc:160]   StreamExecutor device (0): Host, Default Version
2025-04-22 14:58:30.101001: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1745326710.251778 25887979 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 62ms/step


In [5]:
!head reco_nav.csv


Id,Category
0,5
1,3
2,8
3,6
4,5
5,0
6,1
7,1
8,3


In [6]:
import os

os.chdir(r"./kaggle/working")
from IPython.display import FileLink

FileLink(r"reco_nav.csv")


In [7]:
!ls

[34mships32[m[m
