In [56]:
import pandas as pd
import numpy as np

from pathlib import Path

import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
train_dir = "C:\\Users\\Bastien\\Desktop\\dog-breed-identification\\train"
test_dir = "C:\\Users\\Bastien\\Desktop\\dog-breed-identification\\test"
labels_path = "C:\\Users\\Bastien\\Desktop\\dog-breed-identification\\labels.csv"

In [3]:
labels_df = pd.read_csv(labels_path)
labels_df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [4]:
labels_df.breed.value_counts()

breed
scottish_deerhound      126
maltese_dog             117
afghan_hound            116
entlebucher             115
bernese_mountain_dog    114
                       ... 
golden_retriever         67
komondor                 67
brabancon_griffon        67
eskimo_dog               66
briard                   66
Name: count, Length: 120, dtype: int64

In [6]:
IMG_DIR = Path(train_dir)

labels_df["filepath"] = labels_df["id"].apply(lambda x: str(IMG_DIR / f"{x}.jpg"))

missing = (~labels_df["filepath"].apply(lambda p: Path(p).exists())).sum()
print("Images manquantes:", missing)

Images manquantes: 0


In [7]:
le = LabelEncoder()
labels_df["breed_idx"] = le.fit_transform(labels_df["breed"])

labels_df

Unnamed: 0,id,breed,filepath,breed_idx
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,C:\Users\Bastien\Desktop\dog-breed-identificat...,19
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,C:\Users\Bastien\Desktop\dog-breed-identificat...,37
2,001cdf01b096e06d78e9e5112d419397,pekinese,C:\Users\Bastien\Desktop\dog-breed-identificat...,85
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,C:\Users\Bastien\Desktop\dog-breed-identificat...,15
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,C:\Users\Bastien\Desktop\dog-breed-identificat...,49
...,...,...,...,...
10217,ffd25009d635cfd16e793503ac5edef0,borzoi,C:\Users\Bastien\Desktop\dog-breed-identificat...,18
10218,ffd3f636f7f379c51ba3648a9ff8254f,dandie_dinmont,C:\Users\Bastien\Desktop\dog-breed-identificat...,35
10219,ffe2ca6c940cddfee68fa3cc6c63213f,airedale,C:\Users\Bastien\Desktop\dog-breed-identificat...,3
10220,ffe5f6d8e2bff356e9482a80a6e29aac,miniature_pinscher,C:\Users\Bastien\Desktop\dog-breed-identificat...,75


In [8]:
X = labels_df["filepath"]
y = labels_df["breed_idx"]

In [10]:
# Train test split
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [11]:
# Création des pipelines d'entraînement et de validation avec tf.data :
# les images sont lues depuis le disque, décodées, redimensionnées à 224×224
# et converties en float32. Les données sont ensuite mélangées, regroupées
# en batches et préchargées pour optimiser les performances d'entraînement.

IMG_SIZE = 224
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def load_image(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = tf.cast(img, tf.float32)
    return img, label

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_ds = train_ds.shuffle(2048).map(load_image, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_ds = val_ds.map(load_image, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

In [12]:
base = tf.keras.applications.EfficientNetB0()
base.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0.h5
[1m21834768/21834768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [13]:
len(base.layers)

241

In [14]:
# Nous utilisons un modèle EfficientNetB0 pré-entraîné sur ImageNet.
# Le modèle est ensuite entraîné sur le jeu de données Dog Breed Identification
# afin d’adapter la classification aux 120 races de chiens.

base = tf.keras.applications.EfficientNetB0(
    include_top=False, weights="imagenet",
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)
base.trainable = False

inputs = tf.keras.Input((IMG_SIZE, IMG_SIZE, 3))
x = tf.keras.layers.RandomFlip("horizontal")(inputs) # Data Augmentation
x = tf.keras.layers.RandomRotation(0.05)(x)
x = tf.keras.layers.RandomZoom(0.1)(x)

x = tf.keras.applications.efficientnet.preprocess_input(x)
x = base(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.3)(x)
outputs = tf.keras.layers.Dense(120, activation="softmax")(x)

model = tf.keras.Model(inputs, outputs)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"],
)


In [15]:
model.summary()

In [16]:
history = model.fit(train_ds, validation_data=val_ds, epochs=10)


Epoch 1/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 826ms/step - accuracy: 0.5657 - loss: 2.1510 - val_accuracy: 0.7980 - val_loss: 0.8649
Epoch 2/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 935ms/step - accuracy: 0.7974 - loss: 0.8276 - val_accuracy: 0.8352 - val_loss: 0.6151
Epoch 3/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 770ms/step - accuracy: 0.8348 - loss: 0.6263 - val_accuracy: 0.8416 - val_loss: 0.5525
Epoch 4/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 793ms/step - accuracy: 0.8591 - loss: 0.5238 - val_accuracy: 0.8425 - val_loss: 0.5210
Epoch 5/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 782ms/step - accuracy: 0.8854 - loss: 0.4346 - val_accuracy: 0.8445 - val_loss: 0.5142
Epoch 6/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 777ms/step - accuracy: 0.8942 - loss: 0.3881 - val_accuracy: 0.8435 - val_loss: 0.4989
Epoc

In [17]:
val_loss, val_acc = model.evaluate(val_ds, verbose=0)
print(f"Validation loss: {val_loss:.4f} | Validation acc: {val_acc:.4f}")

Validation loss: 0.4927 | Validation acc: 0.8411


In [45]:
# Test rapide

def load_single_image(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = tf.cast(img, tf.float32)
    return img

img_path = r"C:\Users\Bastien\Desktop\dog-breed-identification\test\0ce3f883d80da4cbfce335fcfc38b1b2.jpg"
img = load_single_image(img_path)

# batch de taille 1
img_batch = tf.expand_dims(img, axis=0)

pred = model.predict(img_batch)
pred_id = np.argmax(pred)
confidence = np.max(pred)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step


In [55]:
pred_label = le.classes_.tolist()[pred_id]
print(f"Prédiction : {pred_label} ({confidence:.2%})")


Prédiction : pug (99.58%)


**Export du model et des labels**

In [50]:
from pathlib import Path
import json

EXPORT_DIR = Path("artifacts")
EXPORT_DIR.mkdir(exist_ok=True)

In [51]:
model.save(EXPORT_DIR / "dog_breed_model.keras")

In [53]:
labels = le.classes_.tolist()  # ordre des classes

with open(EXPORT_DIR / "labels.json", "w") as f:
    json.dump(labels, f)