In [1]:
import os
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"
import importlib
import common_utils
importlib.reload(common_utils)
import pandas as pd
import itertools

import tensorflow as tf
from common_utils import get_unique_image_shapes,get_unique_image_paths,load_images_from_paths,build_image_dataframe,split_data, bin_ages, build_cnn_model,build_model_from_config

2025-04-01 02:04:23.736526: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-01 02:04:23.746167: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743465863.757078   80061 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743465863.760255   80061 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743465863.769127   80061 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:


tf.keras.mixed_precision.set_global_policy('mixed_float16')

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


In [3]:
image_paths_csv = pd.read_csv('./processed_data/image_paths.csv')

In [4]:
paths_train_df, paths_val_df, paths_test_df = split_data(image_paths_csv)

In [5]:
configurations = {
    'channels': [3],
    'use_skip': [True, False],
    'num_conv_layers': [3, 4,],
    'base_filters': [16,32, 64],
    'kernel_size': [3, 5,7],
    'activation': ['relu', 'elu','leakyrelu','tanh','swish'],
    'num_dense_layers': [1, 2,3,4],
    'dense_units': [128, 256],
    'dropout_rate': [0.3, 0.5,0.7],
    'output_activation': ['sigmoid', 'softmax'],
}


In [6]:


all_combinations = list(itertools.product(*configurations.values()))
valid_configs = []

for combo in all_combinations:
    params = dict(zip(configurations.keys(), combo))
    
    # Enforce: If use_skip=True, use_pooling=False
    if params['use_skip']:
        params['use_pooling'] = False
    else:
        params['use_pooling'] = True  # Or add to search space
    
    valid_configs.append(params)



In [7]:
train_dataset = load_images_from_paths(paths_train_df, channels=3,ratio=0.1)
val_dataset = load_images_from_paths(paths_val_df, channels=3,ratio=0.1)


I0000 00:00:1743465865.369970   80061 gpu_process_state.cc:208] Using CUDA malloc Async allocator for GPU: 0
I0000 00:00:1743465865.371261   80061 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6172 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [8]:
def configure_dataset(dataset, shuffle=False):
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1000)  # Adjust buffer size as needed
    dataset = dataset.batch(32).prefetch(tf.data.AUTOTUNE)
    return dataset


In [9]:
train_dataset = configure_dataset(train_dataset, shuffle=True)
val_dataset = configure_dataset(val_dataset)


In [10]:
model_dict = {}

for i, config in enumerate(valid_configs):  # Limit to 10 for demo
    # Generate unique model name (your existing code)
    name_parts = [
        f"{config['channels']}ch",
        f"skip_{config['use_skip']}",
        f"conv{config['num_conv_layers']}",
        f"k{config['kernel_size']}",
        config['activation'],
        f"dense{config['num_dense_layers']}x{config['dense_units']}",
        f"drop{config['dropout_rate']}",
        f"out_{config['output_activation']}"
    ]
    model_name = "_".join(name_parts)
    
    # Build and compile
    model = build_model_from_config(config)
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',  # Fixed loss function
        metrics=['accuracy']
    )
    
    
        # Train
    history = model.fit(
        train_dataset,  # Dataset yields (images, targets)
        validation_data=val_dataset,
        epochs=50,
        batch_size=8,
        verbose=0,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)]
    )
    
    # Store results
    model_dict[model_name] = {
        'model': model,
        'history': history.history,
        'config': config
    }
    print(f"Trained {model_name} | Val acc: {max(history.history['val_accuracy']):.4f}")


I0000 00:00:1743465867.827724   80171 service.cc:152] XLA service 0x70a484006da0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743465867.827742   80171 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3070 Laptop GPU, Compute Capability 8.6
2025-04-01 02:04:27.869133: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743465868.218427   80171 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1743465874.782813   80171 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




Trained 3ch_skip_True_conv3_k3_relu_dense1x128_drop0.3_out_sigmoid | Val acc: 0.1395
Trained 3ch_skip_True_conv3_k3_relu_dense1x128_drop0.3_out_softmax | Val acc: 0.1453
Trained 3ch_skip_True_conv3_k3_relu_dense1x128_drop0.5_out_sigmoid | Val acc: 0.2151
Trained 3ch_skip_True_conv3_k3_relu_dense1x128_drop0.5_out_softmax | Val acc: 0.1453


2025-04-01 02:05:20.705936: E external/local_xla/xla/stream_executor/gpu/gpu_cudamallocasync_allocator.cc:359] gpu_async_0 cuMemAllocAsync failed to allocate 536870912 bytes: RESOURCE_EXHAUSTED: : CUDA_ERROR_OUT_OF_MEMORY: out of memory
 Reported by CUDA: Free memory/Total memory: 331677696/8248885248
2025-04-01 02:05:20.705953: E external/local_xla/xla/stream_executor/gpu/gpu_cudamallocasync_allocator.cc:364] Stats: Limit:                      6472138752
InUse:                      7517671709
MaxInUse:                   7564940341
NumAllocs:                        7598
MaxAllocSize:                577842224
Reserved:                            0
PeakReserved:                        0
LargestFreeBlock:                    0

2025-04-01 02:05:20.705962: E external/local_xla/xla/stream_executor/gpu/gpu_cudamallocasync_allocator.cc:68] Histogram of current allocation: (allocation_size_in_bytes, nb_allocation_of_that_sizes), ...;
2025-04-01 02:05:20.705963: E external/local_xla/xla/stream_e

ResourceExhaustedError: {{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:AddV2] name: 