In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sea
import optuna
import random
import gc
import json


import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import Callback

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, ReLU, BatchNormalization
from tensorflow.keras.initializers import HeNormal, Ones, Constant
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.metrics import classification_report, confusion_matrix
from IPython.display import display
from PIL import Image

import warnings
warnings.filterwarnings("ignore")

In [2]:
def set_seed(seed_value=3126):
    random.seed(seed_value)
    np.random.seed(seed_value)
    tf.random.set_seed(seed_value)

SEED = 3126
set_seed()

In [3]:
WIDTH, HEIGHT = 224, 224
BATCH_SIZE = 32
EPOCHS = 12
DATA_PATH = '/kaggle/input/finalized-astrovision-data'

#data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    DATA_PATH,
    target_size=(WIDTH, HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=SEED  
)

validation_generator = train_datagen.flow_from_directory(
    DATA_PATH,
    target_size=(WIDTH, HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=SEED 
)

Found 4913 images belonging to 4 classes.
Found 1228 images belonging to 4 classes.


In [4]:
def make_dense_layer(input_size, dropout_rate=0.0, input_shape=None):
    layers = []
    if input_shape:
        layers.append(Dense(input_size,
                            use_bias=False, 
                            input_shape=input_shape,
                            kernel_initializer=HeNormal()
                            ))
    else:
        layers.append(Dense(input_size, use_bias=False, kernel_initializer=HeNormal()))
    layers.extend([BatchNormalization(gamma_initializer=Ones(), beta_initializer=Constant(0.25)),
                  ReLU()])
    if dropout_rate > 0:
        layers.append(Dropout(dropout_rate))
    return Sequential(layers)

In [5]:
best_weights_path = "/kaggle/working/best_weights.weights.h5"

early_stopping_callback = EarlyStopping(
    monitor='val_loss',  
    patience=10,              
    mode='min',              
    min_delta=.001,
    restore_best_weights=True,  
    verbose=1
)

In [6]:
class ClearMemory(Callback):
    def on_epoch_end(self, epoch, logs=None):
        K.clear_session()  
        gc.collect() 

In [7]:
best_val_acc = 0

def base_model_objective(trial):
    global best_val_acc
    ## Define distributions of hyperparams
    learning_rate_ = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    weight_decay_  = trial.suggest_float("weight_decay", 1e-12, 1e-1, log=True)
    beta1_         = trial.suggest_float("beta1", .895, .999) 
    beta2_         = trial.suggest_float("beta2", .895, .999) 

    # Build the model
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(WIDTH, HEIGHT, 3))
    base_model.trainable = False  # Freeze base model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)

    fc_layers = Sequential([
        make_dense_layer(1024, input_shape=(x.shape[-1],)),
        make_dense_layer(512),
        make_dense_layer(8)
    ])
    x = fc_layers(x)
    predictions = Dense(train_generator.num_classes, activation='softmax', kernel_initializer=HeNormal())(x)

    checkpoint_callback = ModelCheckpoint(
        filepath=best_weights_path,  
        monitor='val_accuracy',    
        save_best_only=True,       
        save_weights_only=True,    
        mode='max',                
        verbose=1,
        initial_value_threshold=best_val_acc
    )
    
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=AdamW(learning_rate_, weight_decay_, beta1_, beta2_),
                  loss='categorical_crossentropy', metrics=['accuracy'])

    # Reload pretrained weights (if necessary)
    model.load_weights("/kaggle/input/baseline-tf-model-3-hidden/best_weights.weights.h5")

    history = model.fit(
        train_generator,
        epochs=EPOCHS,
        validation_data=validation_generator,
        callbacks=[checkpoint_callback, early_stopping_callback, ClearMemory()]
    )

    loss, val_acc = model.evaluate(validation_generator, 
                                   steps=validation_generator.samples // BATCH_SIZE)
    
    if(val_acc>best_val_acc):
        best_val_acc = val_acc
        print(f"\nNew best model found with val_acc = {best_val_acc:.4f}\n")
    return val_acc

In [8]:
## Objective is to maximize accuracy!
study = optuna.create_study(study_name="baseline_params_search", direction="maximize")
study.optimize(base_model_objective, n_trials=22)

[I 2025-03-10 19:22:05,594] A new study created in memory with name: baseline_params_search


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 593ms/step - accuracy: 0.8181 - loss: 0.4011
Epoch 1: val_accuracy improved from 0.00000 to 0.78502, saving model to /kaggle/working/best_weights.weights.h5
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 784ms/step - accuracy: 0.8181 - loss: 0.4012 - val_accuracy: 0.7850 - val_loss: 0.4454
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8271 - loss: 0.3899
Epoch 2: val_accuracy improved from 0.78502 to 0.80456, saving model to /kaggle/working/best_weights.weights.h5
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 464ms/step - accuracy: 0.8270 - loss: 0.3900 - val_accuracy: 0.80

[I 2025-03-10 19:38:23,643] Trial 0 finished with value: 0.8223684430122375 and parameters: {'learning_rate': 0.006714819991380839, 'weight_decay': 1.567225612893073e-10, 'beta1': 0.9198385739502527, 'beta2': 0.95248078116008}. Best is trial 0 with value: 0.8223684430122375.



New best model found with val_acc = 0.8224

Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 372ms/step - accuracy: 0.8262 - loss: 0.4006
Epoch 1: val_accuracy improved from 0.82237 to 0.82818, saving model to /kaggle/working/best_weights.weights.h5
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 502ms/step - accuracy: 0.8263 - loss: 0.4004 - val_accuracy: 0.8282 - val_loss: 0.3683
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 361ms/step - accuracy: 0.8422 - loss: 0.3593
Epoch 2: val_accuracy did not improve from 0.82818
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 458ms/step - accuracy: 0.8423 - loss: 0.3592 - val_accuracy: 0.8111 - val_loss: 0.3910
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 364ms/step - accuracy: 0.8475 - loss: 0.3595
Epoch 3: val_accuracy improved from 0.82818 to 0.82980, saving model to /kaggle/working/best_weights.weights.h5
[1m15

[I 2025-03-10 19:51:20,407] Trial 1 finished with value: 0.8322368264198303 and parameters: {'learning_rate': 0.0005735247287532914, 'weight_decay': 0.06057478524282291, 'beta1': 0.9411660322497772, 'beta2': 0.9414513257861091}. Best is trial 1 with value: 0.8322368264198303.



New best model found with val_acc = 0.8322

Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 372ms/step - accuracy: 0.8093 - loss: 0.4089
Epoch 1: val_accuracy did not improve from 0.83224
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 501ms/step - accuracy: 0.8094 - loss: 0.4088 - val_accuracy: 0.8306 - val_loss: 0.3739
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 360ms/step - accuracy: 0.8265 - loss: 0.4051
Epoch 2: val_accuracy improved from 0.83224 to 0.85261, saving model to /kaggle/working/best_weights.weights.h5
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 461ms/step - accuracy: 0.8266 - loss: 0.4050 - val_accuracy: 0.8526 - val_loss: 0.3675
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8204 - loss: 0.3827
Epoch 3: val_accuracy did not improve from 0.85261
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 

[I 2025-03-10 20:04:12,429] Trial 2 finished with value: 0.8536184430122375 and parameters: {'learning_rate': 1.3242072255667857e-05, 'weight_decay': 0.00012404925215976794, 'beta1': 0.9121562477192046, 'beta2': 0.9447441676731517}. Best is trial 2 with value: 0.8536184430122375.



New best model found with val_acc = 0.8536

Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 372ms/step - accuracy: 0.7384 - loss: 0.5671
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 501ms/step - accuracy: 0.7388 - loss: 0.5667 - val_accuracy: 0.7663 - val_loss: 0.5886
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 365ms/step - accuracy: 0.8037 - loss: 0.4587
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 464ms/step - accuracy: 0.8037 - loss: 0.4588 - val_accuracy: 0.7581 - val_loss: 0.5690
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 364ms/step - accuracy: 0.7964 - loss: 0.4484
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 462ms/step - accuracy: 0.7964 - loss: 0.4485 - val_accuracy: 

[I 2025-03-10 20:17:08,437] Trial 3 finished with value: 0.7508223652839661 and parameters: {'learning_rate': 0.03321260016517144, 'weight_decay': 0.024074345002170056, 'beta1': 0.91186363143446, 'beta2': 0.9493167697270096}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 374ms/step - accuracy: 0.7509 - loss: 0.5447
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 501ms/step - accuracy: 0.7512 - loss: 0.5443 - val_accuracy: 0.7695 - val_loss: 0.6309
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 361ms/step - accuracy: 0.7879 - loss: 0.4676
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 460ms/step - accuracy: 0.7880 - loss: 0.4675 - val_accuracy: 0.7614 - val_loss: 0.5027
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8054 - loss: 0.4435
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 461ms/step - accuracy: 0.8053 - loss: 0.4438 - val_accuracy: 0.7964 - val_loss: 0.4352
Epoch 4/12
[1m153/

[I 2025-03-10 20:30:04,500] Trial 4 finished with value: 0.7722039222717285 and parameters: {'learning_rate': 0.02699715978095016, 'weight_decay': 0.03577932250286161, 'beta1': 0.9299315017866149, 'beta2': 0.9470532564117805}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 372ms/step - accuracy: 0.8406 - loss: 0.3758
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 500ms/step - accuracy: 0.8405 - loss: 0.3759 - val_accuracy: 0.8322 - val_loss: 0.3692
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 361ms/step - accuracy: 0.8300 - loss: 0.3714
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 463ms/step - accuracy: 0.8300 - loss: 0.3714 - val_accuracy: 0.8363 - val_loss: 0.3637
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8268 - loss: 0.3644
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 461ms/step - accuracy: 0.8270 - loss: 0.3642 - val_accuracy: 0.8192 - val_loss: 0.3803
Epoch 4/12
[1m153/

[I 2025-03-10 20:45:32,042] Trial 5 finished with value: 0.8347039222717285 and parameters: {'learning_rate': 0.00032203876854350924, 'weight_decay': 1.3783570212039038e-06, 'beta1': 0.9140492714584976, 'beta2': 0.9873641292757978}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 376ms/step - accuracy: 0.8201 - loss: 0.4203
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 508ms/step - accuracy: 0.8201 - loss: 0.4201 - val_accuracy: 0.7883 - val_loss: 0.4243
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 367ms/step - accuracy: 0.8299 - loss: 0.3894
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 464ms/step - accuracy: 0.8299 - loss: 0.3892 - val_accuracy: 0.8135 - val_loss: 0.4346
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 361ms/step - accuracy: 0.8420 - loss: 0.3656
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 463ms/step - accuracy: 0.8420 - loss: 0.3656 - val_accuracy: 0.7834 - val_loss: 0.4520
Epoch 4/12
[1m153/

[I 2025-03-10 20:58:35,273] Trial 6 finished with value: 0.7894737124443054 and parameters: {'learning_rate': 0.005994179475827017, 'weight_decay': 1.2523462970490865e-08, 'beta1': 0.9529545108027255, 'beta2': 0.9754024920013774}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 377ms/step - accuracy: 0.8200 - loss: 0.3961
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 508ms/step - accuracy: 0.8201 - loss: 0.3959 - val_accuracy: 0.8363 - val_loss: 0.3648
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8337 - loss: 0.3808
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 463ms/step - accuracy: 0.8337 - loss: 0.3808 - val_accuracy: 0.8518 - val_loss: 0.3621
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 364ms/step - accuracy: 0.8186 - loss: 0.3952
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 465ms/step - accuracy: 0.8187 - loss: 0.3952 - val_accuracy: 0.8461 - val_loss: 0.3595
Epoch 4/12
[1m153/

[I 2025-03-10 21:14:31,998] Trial 7 finished with value: 0.8305920958518982 and parameters: {'learning_rate': 1.352304899194472e-05, 'weight_decay': 0.0002382928688728162, 'beta1': 0.9812509464424974, 'beta2': 0.9642653590600209}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 386ms/step - accuracy: 0.8182 - loss: 0.4079
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 520ms/step - accuracy: 0.8182 - loss: 0.4080 - val_accuracy: 0.8477 - val_loss: 0.3629
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 375ms/step - accuracy: 0.8160 - loss: 0.4026
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 477ms/step - accuracy: 0.8161 - loss: 0.4024 - val_accuracy: 0.8314 - val_loss: 0.3931
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 378ms/step - accuracy: 0.8323 - loss: 0.3625
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 482ms/step - accuracy: 0.8324 - loss: 0.3625 - val_accuracy: 0.8347 - val_loss: 0.3772
Epoch 4/12
[1m153/

[I 2025-03-10 21:27:49,814] Trial 8 finished with value: 0.8379934430122375 and parameters: {'learning_rate': 4.452614807102911e-05, 'weight_decay': 1.5606385665158862e-09, 'beta1': 0.9451121121759939, 'beta2': 0.9904680268437699}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 381ms/step - accuracy: 0.7193 - loss: 0.6417
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 512ms/step - accuracy: 0.7195 - loss: 0.6412 - val_accuracy: 0.7524 - val_loss: 0.7221
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 374ms/step - accuracy: 0.7527 - loss: 0.5390
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 477ms/step - accuracy: 0.7529 - loss: 0.5387 - val_accuracy: 0.7948 - val_loss: 0.5708
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 372ms/step - accuracy: 0.8146 - loss: 0.4278
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 478ms/step - accuracy: 0.8144 - loss: 0.4281 - val_accuracy: 0.7891 - val_loss: 0.6394
Epoch 4/12
[1m153/

[I 2025-03-10 21:41:06,708] Trial 9 finished with value: 0.7648026347160339 and parameters: {'learning_rate': 0.0475632245208489, 'weight_decay': 4.619261668855777e-10, 'beta1': 0.971366758967713, 'beta2': 0.9178835711739907}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 385ms/step - accuracy: 0.8201 - loss: 0.3884
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 521ms/step - accuracy: 0.8201 - loss: 0.3884 - val_accuracy: 0.8477 - val_loss: 0.3749
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 375ms/step - accuracy: 0.8357 - loss: 0.3768
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 479ms/step - accuracy: 0.8357 - loss: 0.3767 - val_accuracy: 0.8485 - val_loss: 0.3723
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 374ms/step - accuracy: 0.8274 - loss: 0.3796
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 478ms/step - accuracy: 0.8275 - loss: 0.3795 - val_accuracy: 0.8257 - val_loss: 0.3747
Epoch 4/12
[1m153/

[I 2025-03-10 21:54:22,841] Trial 10 finished with value: 0.8388158082962036 and parameters: {'learning_rate': 8.120203480225477e-05, 'weight_decay': 7.938170706936042e-06, 'beta1': 0.896906572073798, 'beta2': 0.9025290894992427}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 377ms/step - accuracy: 0.8204 - loss: 0.4006
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 509ms/step - accuracy: 0.8205 - loss: 0.4004 - val_accuracy: 0.8420 - val_loss: 0.3694
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 367ms/step - accuracy: 0.8297 - loss: 0.3934
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 471ms/step - accuracy: 0.8297 - loss: 0.3933 - val_accuracy: 0.8363 - val_loss: 0.3829
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 367ms/step - accuracy: 0.8357 - loss: 0.3838
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 469ms/step - accuracy: 0.8358 - loss: 0.3835 - val_accuracy: 0.8404 - val_loss: 0.3673
Epoch 4/12
[1m153/

[I 2025-03-10 22:07:37,148] Trial 11 finished with value: 0.8388158082962036 and parameters: {'learning_rate': 5.9603575982899707e-05, 'weight_decay': 1.2062523473835431e-05, 'beta1': 0.8982757177546478, 'beta2': 0.8955280233151324}. Best is trial 2 with value: 0.8536184430122375.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 377ms/step - accuracy: 0.8242 - loss: 0.4040
Epoch 1: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 509ms/step - accuracy: 0.8241 - loss: 0.4040 - val_accuracy: 0.8420 - val_loss: 0.3712
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 369ms/step - accuracy: 0.8118 - loss: 0.4184
Epoch 2: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 471ms/step - accuracy: 0.8119 - loss: 0.4183 - val_accuracy: 0.8233 - val_loss: 0.3815
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 364ms/step - accuracy: 0.8177 - loss: 0.4130
Epoch 3: val_accuracy did not improve from 0.85362
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 469ms/step - accuracy: 0.8178 - loss: 0.4128 - val_accuracy: 0.8347 - val_loss: 0.3953
Epoch 4/12
[1m153/

[I 2025-03-10 22:23:14,328] Trial 12 finished with value: 0.8544408082962036 and parameters: {'learning_rate': 1.0954700275088106e-05, 'weight_decay': 0.00014313277277334192, 'beta1': 0.8956561164123156, 'beta2': 0.9224535952932861}. Best is trial 12 with value: 0.8544408082962036.



New best model found with val_acc = 0.8544

Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 373ms/step - accuracy: 0.8227 - loss: 0.3880
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 507ms/step - accuracy: 0.8227 - loss: 0.3881 - val_accuracy: 0.8339 - val_loss: 0.3813
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 367ms/step - accuracy: 0.8209 - loss: 0.4032
Epoch 2: val_accuracy improved from 0.85444 to 0.85993, saving model to /kaggle/working/best_weights.weights.h5
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 472ms/step - accuracy: 0.8210 - loss: 0.4030 - val_accuracy: 0.8599 - val_loss: 0.3623
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 366ms/step - accuracy: 0.8215 - loss: 0.3949
Epoch 3: val_accuracy did not improve from 0.85993
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 

[I 2025-03-10 22:36:23,537] Trial 13 finished with value: 0.8536184430122375 and parameters: {'learning_rate': 1.0099822712495519e-05, 'weight_decay': 0.0004843433493498541, 'beta1': 0.897889974248678, 'beta2': 0.9254514086169194}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 378ms/step - accuracy: 0.8217 - loss: 0.3907
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 510ms/step - accuracy: 0.8217 - loss: 0.3906 - val_accuracy: 0.8184 - val_loss: 0.3900
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8312 - loss: 0.3768
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 467ms/step - accuracy: 0.8312 - loss: 0.3767 - val_accuracy: 0.8192 - val_loss: 0.3850
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 364ms/step - accuracy: 0.8411 - loss: 0.3650
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 469ms/step - accuracy: 0.8411 - loss: 0.3650 - val_accuracy: 0.8282 - val_loss: 0.3762
Epoch 4/12
[1m153/

[I 2025-03-10 22:49:28,692] Trial 14 finished with value: 0.8305920958518982 and parameters: {'learning_rate': 0.00016069540088796075, 'weight_decay': 7.32559994315877e-08, 'beta1': 0.997744519324883, 'beta2': 0.926777123606436}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 381ms/step - accuracy: 0.8151 - loss: 0.4127
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 517ms/step - accuracy: 0.8151 - loss: 0.4126 - val_accuracy: 0.8461 - val_loss: 0.3607
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 365ms/step - accuracy: 0.8196 - loss: 0.3857
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 468ms/step - accuracy: 0.8197 - loss: 0.3857 - val_accuracy: 0.8412 - val_loss: 0.3684
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 364ms/step - accuracy: 0.8342 - loss: 0.3745
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 466ms/step - accuracy: 0.8342 - loss: 0.3746 - val_accuracy: 0.8436 - val_loss: 0.3780
Epoch 4/12
[1m153/

[I 2025-03-10 23:02:37,212] Trial 15 finished with value: 0.8412829041481018 and parameters: {'learning_rate': 2.501157587518339e-05, 'weight_decay': 1.5919388621039698e-12, 'beta1': 0.931178105542173, 'beta2': 0.9119217411625601}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 370ms/step - accuracy: 0.8264 - loss: 0.3921
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 505ms/step - accuracy: 0.8265 - loss: 0.3919 - val_accuracy: 0.8176 - val_loss: 0.3790
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8537 - loss: 0.3305
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 468ms/step - accuracy: 0.8536 - loss: 0.3308 - val_accuracy: 0.8396 - val_loss: 0.3875
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 365ms/step - accuracy: 0.8395 - loss: 0.3495
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 469ms/step - accuracy: 0.8396 - loss: 0.3495 - val_accuracy: 0.8298 - val_loss: 0.3847
Epoch 4/12
[1m153/

[I 2025-03-10 23:15:41,856] Trial 16 finished with value: 0.8240131735801697 and parameters: {'learning_rate': 0.001506954822049188, 'weight_decay': 0.0005453157021098234, 'beta1': 0.9135578789134614, 'beta2': 0.9353637023457798}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 379ms/step - accuracy: 0.8180 - loss: 0.4270
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 513ms/step - accuracy: 0.8181 - loss: 0.4265 - val_accuracy: 0.8331 - val_loss: 0.3735
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 363ms/step - accuracy: 0.8426 - loss: 0.3664
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 466ms/step - accuracy: 0.8426 - loss: 0.3664 - val_accuracy: 0.8257 - val_loss: 0.3694
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 365ms/step - accuracy: 0.8440 - loss: 0.3597
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 472ms/step - accuracy: 0.8440 - loss: 0.3598 - val_accuracy: 0.8257 - val_loss: 0.3939
Epoch 4/12
[1m153/

[I 2025-03-10 23:28:48,974] Trial 17 finished with value: 0.8355262875556946 and parameters: {'learning_rate': 0.00016810380083630307, 'weight_decay': 5.907160478045116e-05, 'beta1': 0.9078665134092785, 'beta2': 0.9616506475997377}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 372ms/step - accuracy: 0.8199 - loss: 0.3930
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 507ms/step - accuracy: 0.8201 - loss: 0.3927 - val_accuracy: 0.8217 - val_loss: 0.3900
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 364ms/step - accuracy: 0.8520 - loss: 0.3466
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 471ms/step - accuracy: 0.8519 - loss: 0.3468 - val_accuracy: 0.8200 - val_loss: 0.3943
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 368ms/step - accuracy: 0.8446 - loss: 0.3587
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 474ms/step - accuracy: 0.8446 - loss: 0.3586 - val_accuracy: 0.8070 - val_loss: 0.4131
Epoch 4/12
[1m153/

[I 2025-03-10 23:41:57,510] Trial 18 finished with value: 0.8264802694320679 and parameters: {'learning_rate': 0.0015635253194909774, 'weight_decay': 0.003447397367051737, 'beta1': 0.9279726950455794, 'beta2': 0.9324400086405018}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 377ms/step - accuracy: 0.8106 - loss: 0.4106
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 510ms/step - accuracy: 0.8107 - loss: 0.4105 - val_accuracy: 0.8379 - val_loss: 0.3679
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 365ms/step - accuracy: 0.8223 - loss: 0.3990
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 474ms/step - accuracy: 0.8223 - loss: 0.3990 - val_accuracy: 0.8388 - val_loss: 0.3659
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 368ms/step - accuracy: 0.8324 - loss: 0.3764
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 473ms/step - accuracy: 0.8324 - loss: 0.3766 - val_accuracy: 0.8347 - val_loss: 0.3663
Epoch 4/12
[1m153/

[I 2025-03-10 23:55:10,529] Trial 19 finished with value: 0.8396381735801697 and parameters: {'learning_rate': 2.4661242734821514e-05, 'weight_decay': 9.801096390989685e-07, 'beta1': 0.9612633407099205, 'beta2': 0.9130825137368326}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 379ms/step - accuracy: 0.8269 - loss: 0.3866
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 516ms/step - accuracy: 0.8269 - loss: 0.3867 - val_accuracy: 0.8412 - val_loss: 0.3651
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 365ms/step - accuracy: 0.8186 - loss: 0.3966
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 473ms/step - accuracy: 0.8187 - loss: 0.3966 - val_accuracy: 0.8249 - val_loss: 0.3746
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 366ms/step - accuracy: 0.8157 - loss: 0.4008
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 470ms/step - accuracy: 0.8159 - loss: 0.4006 - val_accuracy: 0.8331 - val_loss: 0.3803
Epoch 4/12
[1m153/

[I 2025-03-11 00:10:53,434] Trial 20 finished with value: 0.8445723652839661 and parameters: {'learning_rate': 1.0051322397160368e-05, 'weight_decay': 1.6291027268457878e-05, 'beta1': 0.9220212306586055, 'beta2': 0.9622251429242576}. Best is trial 12 with value: 0.8544408082962036.


Epoch 1/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 380ms/step - accuracy: 0.8218 - loss: 0.4002
Epoch 1: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 513ms/step - accuracy: 0.8218 - loss: 0.4002 - val_accuracy: 0.8347 - val_loss: 0.3853
Epoch 2/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 371ms/step - accuracy: 0.8333 - loss: 0.3788
Epoch 2: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 480ms/step - accuracy: 0.8332 - loss: 0.3789 - val_accuracy: 0.8428 - val_loss: 0.3820
Epoch 3/12
[1m153/154[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 368ms/step - accuracy: 0.8187 - loss: 0.3998
Epoch 3: val_accuracy did not improve from 0.85444
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 474ms/step - accuracy: 0.8188 - loss: 0.3996 - val_accuracy: 0.8453 - val_loss: 0.3669
Epoch 4/12
[1m153/

[I 2025-03-11 00:24:09,021] Trial 21 finished with value: 0.8371710777282715 and parameters: {'learning_rate': 1.0758335626265775e-05, 'weight_decay': 0.0018317744691792582, 'beta1': 0.9035405849032465, 'beta2': 0.9231423738710876}. Best is trial 12 with value: 0.8544408082962036.


In [9]:
print("Best parameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

Best parameters: {'learning_rate': 1.0954700275088106e-05, 'weight_decay': 0.00014313277277334192, 'beta1': 0.8956561164123156, 'beta2': 0.9224535952932861}
Best validation accuracy: 0.8544408082962036
