# Pokemon Classifier - Transfer Learning (EfficientNetB0)

Classification de 151 Pokémon avec Transfer Learning.

**Stratégie**: Feature Extraction (30 epochs) puis Fine-Tuning (80 epochs)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input

print(f"TensorFlow: {tf.__version__}")
print(f"GPU: {tf.config.list_physical_devices('GPU')}")

## Chargement des données

In [None]:
data_path = Path('./PokemonData')
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'}

pokemon_dirs = sorted([d for d in data_path.iterdir() if d.is_dir()])
image_paths, labels = [], []

for pokemon_dir in pokemon_dirs:
    for img in pokemon_dir.iterdir():
        if img.suffix.lower() in image_extensions:
            image_paths.append(str(img))
            labels.append(pokemon_dir.name)

image_paths = np.array(image_paths)
labels = np.array(labels)

print(f"Images: {len(image_paths)} | Classes: {len(np.unique(labels))}")

## Split Train/Val/Test

In [None]:
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    image_paths, labels, test_size=0.2, stratify=labels, random_state=42)

val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42)

print(f"Train: {len(train_paths)} | Val: {len(val_paths)} | Test: {len(test_paths)}")

## Préparation des données

In [None]:
unique_labels = sorted(np.unique(train_labels))
label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
num_classes = len(unique_labels)

train_labels_onehot = tf.keras.utils.to_categorical([label_to_idx[l] for l in train_labels], num_classes)
val_labels_onehot = tf.keras.utils.to_categorical([label_to_idx[l] for l in val_labels], num_classes)
test_labels_onehot = tf.keras.utils.to_categorical([label_to_idx[l] for l in test_labels], num_classes)

def load_image(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, (256, 256))
    return img, label

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.15),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.15),
    layers.RandomBrightness(0.15),
])

def augment_and_preprocess(img, label):
    img = data_augmentation(img, training=True)
    return preprocess_input(img), label

def preprocess_only(img, label):
    return preprocess_input(img), label

AUTOTUNE = tf.data.AUTOTUNE
batch_size = 32

train_dataset = (tf.data.Dataset.from_tensor_slices((train_paths, train_labels_onehot))
    .map(load_image, num_parallel_calls=AUTOTUNE)
    .map(augment_and_preprocess, num_parallel_calls=AUTOTUNE)
    .shuffle(1000).batch(batch_size).prefetch(AUTOTUNE))

val_dataset = (tf.data.Dataset.from_tensor_slices((val_paths, val_labels_onehot))
    .map(load_image, num_parallel_calls=AUTOTUNE)
    .map(preprocess_only, num_parallel_calls=AUTOTUNE)
    .batch(batch_size).prefetch(AUTOTUNE))

test_dataset = (tf.data.Dataset.from_tensor_slices((test_paths, test_labels_onehot))
    .map(load_image, num_parallel_calls=AUTOTUNE)
    .map(preprocess_only, num_parallel_calls=AUTOTUNE)
    .batch(batch_size).prefetch(AUTOTUNE))

print("Datasets prêts avec preprocess_input EfficientNet")

## Architecture Transfer Learning

In [None]:
base_model = EfficientNetB0(
    include_top=False,
    weights='imagenet',
    input_shape=(256, 256, 3)
)
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()
print(f"\nBase model: {base_model.count_params():,} params (frozen)")
print(f"Trainable: {sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]):,}")

## Phase 1: Feature Extraction (30 epochs)

In [None]:
print("PHASE 1: Feature Extraction")
print("Base model FROZEN | LR: 1e-3 | Epochs: 30")

callbacks_p1 = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7)
]

history_p1 = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=30,
    callbacks=callbacks_p1
)

print(f"\nPhase 1 terminée - Best val acc: {max(history_p1.history['val_accuracy']):.4f}")

## Phase 2: Fine-Tuning (80 epochs)

In [None]:
print("PHASE 2: Fine-Tuning")

base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
print(f"30 dernières couches dégelées | LR: 1e-5 | Epochs: 80")
print(f"Paramètres trainables: {trainable:,}")

callbacks_p2 = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7)
]

history_p2 = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=80,
    callbacks=callbacks_p2
)

print(f"\nPhase 2 terminée - Best val acc: {max(history_p2.history['val_accuracy']):.4f}")

## Courbes d'entraînement

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# Phase 1
axes[0, 0].plot(history_p1.history['loss'], label='Train')
axes[0, 0].plot(history_p1.history['val_loss'], label='Val')
axes[0, 0].set_title('Phase 1 - Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

axes[0, 1].plot(history_p1.history['accuracy'], label='Train')
axes[0, 1].plot(history_p1.history['val_accuracy'], label='Val')
axes[0, 1].set_title('Phase 1 - Accuracy')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Phase 2
axes[1, 0].plot(history_p2.history['loss'], label='Train')
axes[1, 0].plot(history_p2.history['val_loss'], label='Val')
axes[1, 0].set_title('Phase 2 - Loss')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

axes[1, 1].plot(history_p2.history['accuracy'], label='Train')
axes[1, 1].plot(history_p2.history['val_accuracy'], label='Val')
axes[1, 1].set_title('Phase 2 - Accuracy')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

p1_best = max(history_p1.history['val_accuracy'])
p2_best = max(history_p2.history['val_accuracy'])
print(f"Phase 1: {p1_best:.4f} | Phase 2: {p2_best:.4f} | Gain: +{(p2_best-p1_best)*100:.2f}%")

## Sauvegarde

In [None]:
model.save('pokemon_classifier_efficientnet.keras')
print(f"Modèle sauvegardé: pokemon_classifier_efficientnet.keras")
print(f"Taille: {Path('pokemon_classifier_efficientnet.keras').stat().st_size / (1024*1024):.1f} MB")

## Évaluation sur le Test Set

In [None]:
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"\nTest Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test Loss: {test_loss:.4f}")

## Matrice de confusion - Top 20 classes

In [None]:
y_pred_proba = model.predict(test_dataset)
y_pred = np.argmax(y_pred_proba, axis=1)
y_true = np.argmax(test_labels_onehot, axis=1)

top_20_classes = pd.Series(y_true).value_counts().head(20).index.tolist()
top_20_names = [list(label_to_idx.keys())[i] for i in top_20_classes]

mask = np.isin(y_true, top_20_classes)
cm = confusion_matrix(y_true[mask], y_pred[mask], labels=top_20_classes)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=top_20_names, yticklabels=top_20_names)
plt.title('Confusion Matrix - Top 20 classes')
plt.xlabel('Prédiction')
plt.ylabel('Vérité')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Matrice de confusion - Worst 20 classes

Classes avec le plus d'erreurs de classification.

In [None]:
report = classification_report(y_true, y_pred, target_names=list(label_to_idx.keys()), 
                               output_dict=True, zero_division=0)

class_f1 = {name: report[name]['f1-score'] for name in label_to_idx.keys()}
worst_20_names = sorted(class_f1, key=class_f1.get)[:20]
worst_20_classes = [label_to_idx[name] for name in worst_20_names]

mask_worst = np.isin(y_true, worst_20_classes)
cm_worst = confusion_matrix(y_true[mask_worst], y_pred[mask_worst], labels=worst_20_classes)

plt.figure(figsize=(12, 10))
sns.heatmap(cm_worst, annot=True, fmt='d', cmap='Reds', 
            xticklabels=worst_20_names, yticklabels=worst_20_names)
plt.title('Confusion Matrix - Worst 20 classes (plus faibles F1-scores)')
plt.xlabel('Prédiction')
plt.ylabel('Vérité')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

print("Classes avec les plus faibles performances:")
for name in worst_20_names[:10]:
    print(f"  {name}: F1={class_f1[name]:.3f}")

## Métriques globales

In [None]:
print(f"Accuracy:  {report['accuracy']:.4f}")
print(f"Precision: {report['macro avg']['precision']:.4f}")
print(f"Recall:    {report['macro avg']['recall']:.4f}")
print(f"F1-Score:  {report['macro avg']['f1-score']:.4f}")

## Visualisation des prédictions

In [None]:
np.random.seed(42)
random_indices = np.random.choice(len(test_paths), 12, replace=False)

fig, axes = plt.subplots(3, 4, figsize=(14, 10))

for idx, test_idx in enumerate(random_indices):
    img = tf.io.read_file(test_paths[test_idx])
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img_resized = tf.image.resize(img, (256, 256))
    
    prediction = model.predict(tf.expand_dims(preprocess_input(img_resized), 0), verbose=0)
    predicted_class = list(label_to_idx.keys())[np.argmax(prediction[0])]
    confidence = np.max(prediction[0])
    true_class = test_labels[test_idx]
    
    axes.flat[idx].imshow(img_resized.numpy().astype('uint8'))
    color = 'green' if predicted_class == true_class else 'red'
    axes.flat[idx].set_title(f"{true_class}\n-> {predicted_class} ({confidence*100:.0f}%)", 
                              fontsize=8, color=color)
    axes.flat[idx].axis('off')

plt.tight_layout()
plt.show()

## Test sur images externes

In [None]:
external_dir = Path('./Test_Img_Externes')

if external_dir.exists():
    external_images = [f for f in external_dir.iterdir() 
                       if f.suffix.lower() in image_extensions]
    
    if external_images:
        num_images = min(len(external_images), 8)
        fig, axes = plt.subplots(2, 4, figsize=(14, 7))
        
        for idx, img_path in enumerate(external_images[:num_images]):
            img = tf.io.read_file(str(img_path))
            img = tf.image.decode_image(img, channels=3, expand_animations=False)
            img_resized = tf.image.resize(img, (256, 256))
            
            prediction = model.predict(tf.expand_dims(preprocess_input(img_resized), 0), verbose=0)
            top3_idx = np.argsort(prediction[0])[-3:][::-1]
            
            axes.flat[idx].imshow(img_resized.numpy().astype('uint8'))
            title = "\n".join([f"{list(label_to_idx.keys())[i]} ({prediction[0][i]*100:.0f}%)" 
                               for i in top3_idx])
            axes.flat[idx].set_title(title, fontsize=7)
            axes.flat[idx].axis('off')
        
        for idx in range(num_images, 8):
            axes.flat[idx].axis('off')
        
        plt.tight_layout()
        plt.show()
else:
    print(f"Répertoire {external_dir} non trouvé")