In [1]:
import pathlib
import os

from tensorflow.keras.optimizers import Adam
import tensorflow_datasets as tfds
import tensorflow as tf

from misc.CycleGANTraining import CycleGANTraining
from misc.train_monitor import get_train_monitor
from misc.data import read_train_sample
from misc.data import read_test_sample
from misc.model_tf import CycleGAN
from misc import configs

AUTO = tf.data.AUTOTUNE
tf.random.set_seed(42)

2023-06-07 07:06:22.450972: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## 데이터 셋 다운로드 받는 부분
datasets = tfds.load('cycle_gan/apple2orange')
(train_input, train_output) = datasets['trainA'], datasets['trainB']
(test_input ,  test_output) = datasets['testA'] , datasets['testB'] 

def dataset_map(dataset, func, batch_size, dtype = 'train'): 
    
    dataset = dataset.map(func, num_parallel_calls = AUTO)
    dataset = dataset.shuffle(batch_size).batch(batch_size).repeat() if dtype == 'train' else dataset.shuffle(batch_size).batch(batch_size)
    return dataset

## 데이터 셋 전처리
train_input  = dataset_map(train_input , read_train_sample, configs.TRAIN_BATCH_SIZE)
train_output = dataset_map(train_output, read_train_sample, configs.TRAIN_BATCH_SIZE)

test_input   = dataset_map(test_input  , read_test_sample, configs.INFER_BATCH_SIZE, dtype = 'test')
test_output  = dataset_map(test_output , read_test_sample, configs.INFER_BATCH_SIZE, dtype = 'test')

train_dataset = tf.data.Dataset.zip((train_input, train_output))
loss          = tf.keras.losses.BinaryCrossentropy(from_logits = True)
model         = CycleGAN(configs.IMG_HEIGHT, configs.IMG_WIDTH)

2023-06-07 07:06:29.578358: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 34441 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:07:00.0, compute capability: 8.0
2023-06-07 07:06:29.581322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 38370 MB memory:  -> device: 1, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:0f:00.0, compute capability: 8.0
2023-06-07 07:06:29.587507: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 38370 MB memory:  -> device: 2, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:47:00.0, compute capability: 8.0
2023-06-07 07:06:29.590367: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 38370 MB memory:  -> device: 3, name: NVIDIA A100-SXM4-40GB, pci bu

In [3]:
discX = model.discriminator()
discY = model.discriminator()
genG  = model.generator()
genF  = model.generator()

os.makedirs(configs.BASE_IMAGES_PATH, exist_ok = True)

cycleGAN = CycleGANTraining(
                    gen_G = genG, disc_X = discX,
                    gen_F = genF, disc_Y = discY
            )

cycleGAN.compile(
                    g_optimG = Adam(learning_rate = configs.LR),
                    d_optimX = Adam(learning_rate = configs.LR),
                    g_optimF = Adam(learning_rate = configs.LR),
                    d_optimY = Adam(learning_rate = configs.LR),
                    bceLoss  = loss
                )

callbacks = [get_train_monitor(test_input, test_output, epoch_interval = 10,
                               image_path = configs.BASE_IMAGES_PATH,
                               batch_size = configs.INFER_BATCH_SIZE)]

(None, 256, 256, 3)
(None, 128, 128, 64)
(None, 64, 64, 128)
(None, 32, 32, 256)
(None, 32, 32, 512)
(None, 30, 30, 1)


------------


(None, 256, 256, 3)
(None, 128, 128, 64)
(None, 64, 64, 128)
(None, 32, 32, 256)
(None, 32, 32, 512)
(None, 30, 30, 1)


------------




2023-06-07 07:06:30.630293: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1]
	 [[{{node Placeholder/_2}}]]
2023-06-07 07:06:30.630782: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1]
	 [[{{node Placeholder/_1}}]]
2023-06-07 07:06:30.744070: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline 

In [4]:
cycleGAN.fit(train_dataset, epochs = configs.EPOCHS, callbacks = callbacks,
             steps_per_epoch = configs.STEPS_PER_EPOCH)

cycleGAN.gen_G.save(config.GENERATOR_MODEL)

Epoch 1/50


2023-06-07 07:06:30.899353: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1]
	 [[{{node Placeholder/_2}}]]
2023-06-07 07:06:30.900083: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_3' with dtype int64 and shape [1]
	 [[{{node Placeholder/_3}}]]
2023-06-07 07:06:36.949421: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_2/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2023-06-07 07:06:38.839432: E tensorflow/compiler/xla/st

UnimplementedError: Graph execution error:

Detected at node 'model_1/conv2d_5/Conv2D' defined at (most recent call last):
    File "<frozen runpy>", line 198, in _run_module_as_main
    File "<frozen runpy>", line 88, in _run_code
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 725, in start
      self.io_loop.start()
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "/opt/conda/envs/dove/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
      self._run_once()
    File "/opt/conda/envs/dove/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
      handle._run()
    File "/opt/conda/envs/dove/lib/python3.11/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_2794490/451847961.py", line 1, in <module>
      cycleGAN.fit(train_dataset, epochs = configs.EPOCHS, callbacks = callbacks,
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "/home/jovyan/NVIDIA_CUDA-11.1_Samples/TIL/AI_study/Generative Model/GANs/misc/CycleGANTraining.py", line 45, in train_step
      disc_real_outY = self.disc_Y([target_image], training = True)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/training.py", line 558, in __call__
      return super().__call__(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/layers/convolutional/base_conv.py", line 290, in call
      outputs = self.convolution_op(inputs, self.kernel)
    File "/opt/conda/envs/dove/lib/python3.11/site-packages/keras/layers/convolutional/base_conv.py", line 262, in convolution_op
      return tf.nn.convolution(
Node: 'model_1/conv2d_5/Conv2D'
DNN library is not found.
	 [[{{node model_1/conv2d_5/Conv2D}}]] [Op:__inference_train_function_16762]