# Imports

In [1]:
import os
import numpy as np
from datetime import datetime
import pytz
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Lion
from tensorflow.keras.models import load_model

import folding as fold

now = datetime.now(pytz.timezone("Europe/Rome")).strftime("Day: %Y-%m-%d - Time: %H:%M:%S\n")
print(f"\033[1;94m{now}\033[0m")

[1;94mDay: 2024-11-20 - Time: 15:55:29
[0m


# Dataset

In [2]:
now = datetime.now(pytz.timezone("Europe/Rome")).strftime("Day: %Y-%m-%d - Time: %H:%M:%S\n")
print(f"\033[1;94m{now}\033[0m")

data = np.load("/kaggle/input/bloodcells-augmented/dataset_augmented.npz")
# data = np.load("/kaggle/input/bloodcells-maximisation/dataset_maximisation.npz")
test = np.load("/kaggle/input/bloodcells-evaluation/dataset_evaluation.npz")

images, labels = data['images'], data['labels']
test_images, test_labels = test['images'], test['labels']

[1;94mDay: 2024-11-20 - Time: 15:55:52
[0m


# Set model and parameters

In [15]:
now = datetime.now(pytz.timezone("Europe/Rome")).strftime("Day: %Y-%m-%d - Time: %H:%M:%S\n")
print(f"\033[1;94m{now}\033[0m")

# Trained model
model_name = 'BrainRot'
model_dir = ""

# Base model
base_model_type = tfk.applications.ConvNeXtSmall
base_model_name = "convnext_small"
neurons_first_dense_layer = 128
neurons_second_dense_layer = 32

# Pre-training parameters
learning_rate_pretrain = 1e-4
batch_size_pretrain = 64

# Fine tuning parameters
learning_rate_tuning = 5e-5
batch_size_tuning = 64
tuning_steps = 3
learning_rate_multiplier = 1/6
val_size = 0.2

[1;94mDay: 2024-11-20 - Time: 17:25:56
[0m


# Model build

In [8]:
now = datetime.now(pytz.timezone("Europe/Rome")).strftime("Day: %Y-%m-%d - Time: %H:%M:%S\n")
print(f"\033[1;94m{now}\033[0m")

# Current problem parameters
input_shape = [96, 96, 3]
num_classes = 8

# Load model with pre-trained weights (excluding top layers)
base_model = base_model_type(
    include_top=False,
    weights="imagenet",
    input_shape=input_shape,
    classes=num_classes,
    classifier_activation="softmax",
)

# Model blueprint
inputs = tfkl.Input(shape=input_shape)
x = base_model(inputs, training=False)
x = tfkl.GlobalAveragePooling2D()(x)
x = tfkl.Dense(neurons_first_dense_layer, activation='gelu')(x)
x = tfkl.Dropout(0.2)(x)
x = tfkl.Dense(neurons_second_dense_layer, activation='gelu')(x)
x = tfkl.Dropout(0.2)(x)
outputs = tfkl.Dense(num_classes, activation='softmax')(x)

# Build the final model
model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Compile the model with sparse categorical crossentropy loss
model.compile(
    optimizer=Lion(learning_rate=1e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Display its architecture
model.summary()
# model.get_layer(base_model_name).summary()

[1;94mDay: 2024-11-20 - Time: 15:00:42
[0m
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/convnext/convnext_small_notop.h5
[1m198551472/198551472[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 0us/step


# Pre-training

In [9]:
now = datetime.now(pytz.timezone("Europe/Rome")).strftime("Day: %Y-%m-%d - Time: %H:%M:%S\n")
print(f"\033[1;94m{now}\033[0m")

# Dataset split
validation, training = fold.split_set(data, starting=0, size=val_size)
train_images, train_labels = training['images'], training['labels']
val_images, val_labels = validation['images'], validation['labels']

# Save checkpoint
checkpoint = ModelCheckpoint(
    model_dir + "Topping.keras",
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)

# Train only the top layers (freeze convolutional base layers)
for layer in model.layers:
    layer.trainable = True
for layer in model.get_layer(base_model_name).layers:
    layer.trainable = False

# Recompile the model after freezing the convolutional layers
model.compile(
    optimizer = Lion(learning_rate=learning_rate_pretrain),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model (only the top layers are trained at this stage)
model.fit(
    x=train_images,
    y=train_labels,
    validation_data=(val_images, val_labels),
    epochs=20,
    batch_size=batch_size_pretrain,
    callbacks=[EarlyStopping(patience=3, restore_best_weights=True), checkpoint]
)

# Clear keras session to avoid memory build up
tf.keras.backend.clear_session()
print("Finish!")

[1;94mDay: 2024-11-20 - Time: 15:00:57
[0m
Epoch 1/20


I0000 00:00:1732111273.757989     161 service.cc:145] XLA service 0x78e670016ef0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732111273.758040     161 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m  3/500[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m29s[0m 59ms/step - accuracy: 0.0799 - loss: 2.6731 

I0000 00:00:1732111282.048756     161 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.2296 - loss: 2.0210
Epoch 1: val_loss improved from inf to 1.44120, saving model to Topping.keras
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 77ms/step - accuracy: 0.2297 - loss: 2.0206 - val_accuracy: 0.4701 - val_loss: 1.4412
Epoch 2/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.4693 - loss: 1.4360
Epoch 2: val_loss improved from 1.44120 to 1.23418, saving model to Topping.keras
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 67ms/step - accuracy: 0.4694 - loss: 1.4359 - val_accuracy: 0.5436 - val_loss: 1.2342
Epoch 3/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5481 - loss: 1.2257
Epoch 3: val_loss improved from 1.23418 to 1.15156, saving model to Topping.keras
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 67ms/step - accuracy: 0.5481 - lo

# Fine tuning

In [16]:
now = datetime.now(pytz.timezone("Europe/Rome")).strftime("Day: %Y-%m-%d - Time: %H:%M:%S\n")
print(f"\033[1;94m{now}\033[0m")

# Clear keras session to avoid memory build up
tf.keras.backend.clear_session()

# Load model
model = load_model(model_dir + "Topping.keras")

for ITERATION in range(1, tuning_steps+1):
    print(f"Iteration {ITERATION}/{tuning_steps}")
    
    # Start by unfreezing all layers but the base model's
    for layer in model.layers:
        layer.trainable = True
    model.get_layer(base_model_name).trainable = False
    base_model_layers = model.get_layer(base_model_name).layers
    total_layers = len(base_model_layers)
    
    # Dataset split
    starting_image = (val_size * ITERATION) % 1
    validation, training = fold.split_set(data, starting=starting_image, size=val_size)
    
    # Decide which layers to unfreeze
    train_layer = int(total_layers * ITERATION/tuning_steps)
    print(f"Unfreezing layers {total_layers - train_layer}-{total_layers}")
    
    # Unfreeze only last layers
    for layer in base_model_layers[:-train_layer]:
        layer.trainable = True
    
    # Recompile the model after unfreezing the convolutional layers
    model.compile(
        optimizer=Lion(
            learning_rate=learning_rate_tuning * (learning_rate_multiplier ** (ITERATION-1))
        ),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Fine-tune the entire model (including the convolutional layers)
    model.fit(
        x=training['images'],
        y=training['labels'],
        validation_data=(validation['images'], validation['labels']),
        epochs=1,
        batch_size=batch_size_tuning,
        callbacks=[EarlyStopping(patience=3, restore_best_weights=True)]
    )

model.save(model_dir + model_name + ".keras",)
print("Finish!")

[1;94mDay: 2024-11-20 - Time: 17:26:02
[0m
Iteration 1/3
Unfreezing layers 173-259
Epoch 1/2





[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 188ms/step - accuracy: 0.7833 - loss: 0.6304 - val_accuracy: 0.9040 - val_loss: 0.2860
Epoch 2/2
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 178ms/step - accuracy: 0.9363 - loss: 0.1968 - val_accuracy: 0.9086 - val_loss: 0.2746
Iteration 2/3
Unfreezing layers 87-259
Epoch 1/2





[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 163ms/step - accuracy: 0.9630 - loss: 0.1277 - val_accuracy: 0.9883 - val_loss: 0.0378
Epoch 2/2
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 151ms/step - accuracy: 0.9850 - loss: 0.0542 - val_accuracy: 0.9872 - val_loss: 0.0378
Iteration 3/3
Unfreezing layers 0-259
Epoch 1/2
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 78ms/step - accuracy: 0.9907 - loss: 0.0330 - val_accuracy: 0.9980 - val_loss: 0.0095
Epoch 2/2
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 67ms/step - accuracy: 0.9923 - loss: 0.0294 - val_accuracy: 0.9980 - val_loss: 0.0092
Finish!


# Model Test

In [17]:
now = datetime.now(pytz.timezone("Europe/Rome")).strftime("Day: %Y-%m-%d - Time: %H:%M:%S\n")
print(f"\033[1;94m{now}\033[0m")

model = load_model(model_dir + model_name + ".keras")
test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=1)
print(f"Main model: {model_name}")
print(f"  Test Loss: {test_loss:.4f}")
print(f"  Test Accuracy: {test_accuracy:.4f}")

[1;94mDay: 2024-11-20 - Time: 17:35:20
[0m
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 31ms/step - accuracy: 0.9284 - loss: 0.2734
Main model: BrainRot
  Test Loss: 0.2548
  Test Accuracy: 0.9315
