In [3]:
import optuna
import tensorflow as tf
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import warnings
warnings.filterwarnings("ignore")
from config import *
from modules.util import *
from models.model_v3 import *

In [8]:
def main(**kwargs):
    model_dir, log_dir, result_dir = init_train(**kwargs)
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        def objective(trial):
            n_cnnlayer = trial.suggest_int("n_layer", 1, 10)
            n_denselayer = trial.suggest_int("n_layer", 1, 2)
            n_filter = trial.suggest_int("n_filter", 16, 128)
            n_kernel = trial.suggest_int("n_kernel", 3, 9)
            n_dense = trial.suggest_int("n_dense", 1024, 2048)
            
            model = tf.keras.Sequential()
            model.add(tf.keras.layers.Rescaling(1./255, input_shape=target_size))
            for i in range(n_cnnlayer):
                model.add(tf.keras.layers.Conv2D(n_filter, n_kernel, activation='relu'))
                model.add(tf.keras.layers.MaxPooling2D())
                
            model.add(tf.keras.layers.Flatten())
            for i in range(n_denselayer):
                model.add(tf.keras.layers.Dense(n_dense, activation='relu'))
                
            model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
            lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
                    initial_learning_rate=kwargs["learning_rate"], 
                    first_decay_steps=150, 
                    t_mul=2, 
                    m_mul=0.9, 
                    alpha=0.0, 
                    name=None
                )
            model.compile(
                optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
                loss=kwargs["loss_func"],
                metrics=[tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.FalseNegatives(),tf.keras.metrics.FalsePositives(),tf.keras.metrics.TrueNegatives(),tf.keras.metrics.TruePositives()]
            )
            history = model.fit(
                    train_ds, 
                    validation_data=val_ds, 
                    epochs=EPOCHS, 
                    class_weight=kwargs["class_weight"], 
                    callbacks=get_callbacks(kwargs["model_type"], model_dir, log_dir),  # TestCallback 추가
                    verbose=1, 
                    workers=40, 
                    use_multiprocessing=True
                )
            draw_learning_curve(history, result_dir, DATA_PATH, kwargs["date"])
            test_pred = model.predict(test_image_list)
            return generate_report(test_pred, test_image_label, test_image, result_dir, upload=True)
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100)

        
    


In [9]:
for target_size in TARGET_SIZE:
        # 시작 전에 공통적으로 사용되는 테스트 데이터 로드.
        print(f"test data loading...", end="")
        test_image_list, test_image_label, test_image = load_test(TEST_PATH, target_size)
        print(f"done")
        
        for data in DATA:
            for batch_size in BATCH_SIZE:
                # 학습 데이터 로드.
                if MODE == "train" or MODE == "alpha train":
                    train_ds, val_ds = load_train(DATA_PATH, data, target_size, batch_size)
                    
                for model_type in MODEL:
                    for learning_rate in LEARNING_RATE:
                        for loss_func in LOSS_FUNC:
                            for weight in WEIGHT:
                                # 학습 시작.
                                kwargs = {
                                    "model_type": model_type,
                                    "data": data,
                                    "target_size": target_size,
                                    "batch_size": batch_size,
                                    "learning_rate": learning_rate,
                                    "loss_func": loss_func,
                                    "class_weight": weight,
                                    "date": DATE,
                                }
                                main(**kwargs)

test data loading...done
Found 20000 files belonging to 2 classes.
Using 14000 files for training.
Found 20000 files belonging to 2 classes.
Using 6000 files for validation.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


[I 2023-09-02 22:58:08,595] A new study created in memory with name: no-name-c6024be0-b0a9-4ac2-9fa0-b2cc73545f5a


Epoch 1/300


2023-09-02 22:58:08.733235: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [14000]
	 [[{{node Placeholder/_4}}]]
2023-09-02 22:58:08.733602: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [14000]
	 [[{{node Placeholder/_0}}]]




2023-09-02 22:58:34.685740: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [6000]
	 [[{{node Placeholder/_4}}]]
2023-09-02 22:58:34.686083: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [6000]
	 [[{{node Placeholder/_0}}]]



Epoch 1: saving model to /home/RT_Paper/log/202309022241/PO/CNN/(512, 512, 1)_64_0.0005_binary_crossentropy_(0: 1, 1: 3)/models/CNN_01.h5
Epoch 2/300

[W 2023-09-02 22:58:49,175] Trial 0 failed with parameters: {'n_layer': 4, 'n_filter': 16, 'n_kernel': 4, 'n_dense': 1877} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_729182/2276211175.py", line 36, in objective
    history = model.fit(
  File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 65, in error_handler
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1685, in fit
    tmp_logs = self.train_function(iterator)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py", line 894, in __call__


KeyboardInterrupt: 