In [1]:
import os
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"
import importlib
import common_utils
importlib.reload(common_utils)
import pandas as pd
import itertools

import tensorflow as tf
from common_utils import get_unique_image_shapes,get_unique_image_paths,load_images_from_paths,build_image_dataframe,split_data, bin_ages, build_cnn_model,build_model_from_config

2025-04-01 02:00:06.573427: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-01 02:00:06.688427: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743465606.733646   76914 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743465606.747487   76914 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743465606.855243   76914 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:


tf.keras.mixed_precision.set_global_policy('mixed_float16')

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


In [3]:
image_paths_csv = pd.read_csv('./processed_data/image_paths.csv')

In [4]:
paths_train_df, paths_val_df, paths_test_df = split_data(image_paths_csv)

In [5]:
configurations = {
    'channels': [3],
    'use_skip': [True, False],
    'num_conv_layers': [3, 4,],
    'base_filters': [16,32, 64],
    'kernel_size': [3, 5,7],
    'activation': ['relu', 'elu','leakyrelu','tanh','swish'],
    'num_dense_layers': [1, 2,3,4],
    'dense_units': [128, 256],
    'dropout_rate': [0.3, 0.5,0.7],
    'output_activation': ['sigmoid', 'softmax'],
}


In [6]:


all_combinations = list(itertools.product(*configurations.values()))
valid_configs = []

for combo in all_combinations:
    params = dict(zip(configurations.keys(), combo))
    
    # Enforce: If use_skip=True, use_pooling=False
    if params['use_skip']:
        params['use_pooling'] = False
    else:
        params['use_pooling'] = True  # Or add to search space
    
    valid_configs.append(params)



In [7]:
train_dataset = load_images_from_paths(paths_train_df, channels=3,ratio=0.1)
val_dataset = load_images_from_paths(paths_val_df, channels=3,ratio=0.1)


I0000 00:00:1743465609.347444   76914 gpu_process_state.cc:208] Using CUDA malloc Async allocator for GPU: 0
I0000 00:00:1743465609.349005   76914 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6172 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [8]:
def configure_dataset(dataset, shuffle=False):
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1000)  # Adjust buffer size as needed
    dataset = dataset.batch(32).prefetch(tf.data.AUTOTUNE)
    return dataset


In [9]:
train_dataset = configure_dataset(train_dataset, shuffle=True)
val_dataset = configure_dataset(val_dataset)


In [10]:
model_dict = {}

for i, config in enumerate(valid_configs):  # Limit to 10 for demo
    # Generate unique model name (your existing code)
    name_parts = [
        f"{config['channels']}ch",
        f"skip_{config['use_skip']}",
        f"conv{config['num_conv_layers']}",
        f"k{config['kernel_size']}",
        config['activation'],
        f"dense{config['num_dense_layers']}x{config['dense_units']}",
        f"drop{config['dropout_rate']}",
        f"out_{config['output_activation']}"
    ]
    model_name = "_".join(name_parts)
    
    # Build and compile
    model = build_model_from_config(config)
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',  # Fixed loss function
        metrics=['accuracy']
    )
    
    
        # Train
    history = model.fit(
        train_dataset,  # Dataset yields (images, targets)
        validation_data=val_dataset,
        epochs=50,
        batch_size=8,
        verbose=0,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)]
    )
    
    # Store results
    model_dict[model_name] = {
        'model': model,
        'history': history.history,
        'config': config
    }
    print(f"Trained {model_name} | Val acc: {max(history.history['val_accuracy']):.4f}")


I0000 00:00:1743465612.488343   77034 service.cc:152] XLA service 0x75c830015680 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743465612.488362   77034 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3070 Laptop GPU, Compute Capability 8.6
2025-04-01 02:00:12.532293: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743465612.908533   77034 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-04-01 02:00:19.536887: E external/local_xla/xla/service/slow_operation_alarm.cc:73] Trying algorithm eng19{k2=4} for conv %cudnn-conv-bw-filter.10 = (f16[64,3,3,32]{3,2,1,0}, u8[0]{0}) custom-call(f16[32,200,200,32]{3,2,1,0} %bitcast.9204, f16[32,200,200,64]{3,2,1,0} %bitcast.9166), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_o01i->b01f, custom_call_target="__cudnn$convBackwardFilter", metadata={op_type="Conv2D

Trained 3ch_skip_True_conv3_k3_relu_dense1x128_drop0.3_out_sigmoid | Val acc: 0.2267


2025-04-01 02:00:48.253569: W external/local_xla/xla/hlo/transforms/simplifiers/hlo_rematerialization.cc:3021] Can't reduce memory use below 2.35GiB (2518684013 bytes) by rematerialization; only reduced to 4.97GiB (5337079472 bytes), down from 4.97GiB (5337080709 bytes) originally
2025-04-01 02:00:49.948355: E external/local_xla/xla/stream_executor/gpu/gpu_cudamallocasync_allocator.cc:359] gpu_async_0 cuMemAllocAsync failed to allocate 1404590000 bytes: RESOURCE_EXHAUSTED: : CUDA_ERROR_OUT_OF_MEMORY: out of memory
 Reported by CUDA: Free memory/Total memory: 1403322368/8248885248
2025-04-01 02:00:49.948575: E external/local_xla/xla/stream_executor/gpu/gpu_cudamallocasync_allocator.cc:364] Stats: Limit:                      6472138752
InUse:                      6569536193
MaxInUse:                   7864776049
NumAllocs:                        3040
MaxAllocSize:               1404590000
Reserved:                            0
PeakReserved:                        0
LargestFreeBlock:     

ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3047, in run_cell

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3102, in _run_cell

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3306, in run_cell_async

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3489, in run_ast_nodes

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3549, in run_code

  File "/tmp/ipykernel_76914/2125581266.py", line 27, in <module>

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/home/paul/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

Out of memory while trying to allocate 1404590000 bytes.
	 [[{{node StatefulPartitionedCall}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_multi_step_on_iterator_11391]