In [1]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator # type: ignore
from tensorflow.keras.applications import MobileNetV2 # type: ignore
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D # type: ignore
from tensorflow.keras.models import Model # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint # type: ignore

2024-05-30 17:38:17.911369: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Paths
train_data_dir = "dataset/training"
test_data_dir = "dataset/test"
wip_models_dir = "models_wip"
models_dir = "models"
model_name = "chess_classifier_10k"
current_best = os.path.join(wip_models_dir, f"{model_name}_best.keras")
final = os.path.join(models_dir, f"{model_name}.keras")

# Parameters
img_width, img_height = 224, 224
batch_size = 32
epochs = 10
num_classes = 13  # 6 pieces x 2 colors + 1 empty

In [3]:
os.makedirs(wip_models_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

In [4]:
# Data normalization
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
)

test_datagen = ImageDataGenerator(rescale=1.0 / 255)

In [5]:
# Train and validation generators
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode="categorical",
)

validation_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode="categorical",
)

Found 200537 images belonging to 13 classes.
Found 50135 images belonging to 13 classes.


In [6]:
# Calculate steps_per_epoch and validation_steps
# steps_per_epoch = max(1, train_generator.samples // batch_size)
# validation_steps = max(1, validation_generator.samples // batch_size)
steps_per_epoch = max(1, len(train_generator) // batch_size)
validation_steps = max(1, len(validation_generator) // batch_size)

## Creating the model

Only using a single dense layer on top of MobileNet's, could probably improve the model by adding a few more.

In [7]:
# Load pre-trained MobileNetV2 model + higher level layers
base_model = MobileNetV2(
    weights="imagenet", include_top=False, input_shape=(img_width, img_height, 3)
)

# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(num_classes, activation="softmax")(x)

# Final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

In [8]:
# Callbacks
early_stopping = EarlyStopping(
    monitor="val_loss", patience=10, restore_best_weights=True
)
model_checkpoint = ModelCheckpoint(
    current_best, monitor="val_loss", save_best_only=True
)

## Initial training

In [9]:
# Train the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint],
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 774ms/step - accuracy: 0.8466 - loss: 0.4590 - val_accuracy: 0.9694 - val_loss: 0.0913
Epoch 2/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 875ms/step - accuracy: 0.9802 - loss: 0.0576 - val_accuracy: 0.9915 - val_loss: 0.0204
Epoch 3/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 854ms/step - accuracy: 0.9917 - loss: 0.0239 - val_accuracy: 0.9935 - val_loss: 0.0219
Epoch 4/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 820ms/step - accuracy: 0.9877 - loss: 0.0429 - val_accuracy: 0.9902 - val_loss: 0.0265
Epoch 5/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 784ms/step - accuracy: 0.9925 - loss: 0.0239 - val_accuracy: 0.9902 - val_loss: 0.0278
Epoch 6/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 837ms/step - accuracy: 0.9904 - loss: 0.0282 - val_accuracy: 0.9928 - val_loss: 0.0235
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x14978df90>

## Fine tuning

Only the top layers were trainable in the initial training. In order to improve predictions, we unfreeze a few ImageNet layers and retrain the model with a much lower learning rate.

This might be counter-productive, not sure, needs more testing.

Either way, the best weights are used.

In [10]:
# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-30:]:  # Unfreeze last 30 layers
    layer.trainable = True

# Recompile the model with a lower learning rate
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

# Fine-tune the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint],
)

Epoch 1/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 1s/step - accuracy: 0.9006 - loss: 0.4279 - val_accuracy: 0.9974 - val_loss: 0.0054
Epoch 2/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 1s/step - accuracy: 0.9801 - loss: 0.0710 - val_accuracy: 0.9980 - val_loss: 0.0073
Epoch 3/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 1s/step - accuracy: 0.9921 - loss: 0.0283 - val_accuracy: 0.9993 - val_loss: 0.0039
Epoch 4/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 1s/step - accuracy: 0.9927 - loss: 0.0215 - val_accuracy: 0.9987 - val_loss: 0.0064
Epoch 5/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 1s/step - accuracy: 0.9931 - loss: 0.0191 - val_accuracy: 0.9987 - val_loss: 0.0033
Epoch 6/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 991ms/step - accuracy: 0.9953 - loss: 0.0123 - val_accuracy: 0.9987 - val_loss: 0.0070
Epoch 7/10
[1m195/

<keras.src.callbacks.history.History at 0x149a02d50>

In [11]:
# Save the final model
model.save(final)

In [12]:
# Evaluate the final model
loss, accuracy = model.evaluate(validation_generator, steps=validation_steps)
print(f"Test accuracy: {accuracy:.4f}")

[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 690ms/step - accuracy: 0.9971 - loss: 0.0055
Test accuracy: 0.9980
