In [2]:
import os
# os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"
import shutil
import sys
from pathlib import Path

# Add parent directory to sys.path
parent_dir = Path("..").resolve()
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))

import importlib
import common_utils
importlib.reload(common_utils)

import pandas as pd
import itertools
import tensorflow as tf

from common_utils import (
    get_unique_image_shapes,
    get_unique_image_paths,
    load_images_from_paths,
    build_image_dataframe,
    split_data,
    bin_ages,
    build_cnn_model,
    build_model_from_config
)


In [3]:


# tf.keras.mixed_precision.set_global_policy('mixed_float16')

# gpus = tf.config.experimental.list_physical_devices('GPU')
# for gpu in gpus:
#     tf.config.experimental.set_memory_growth(gpu, True)


In [4]:
image_paths_csv = pd.read_csv('./processed_data/image_paths.csv')

In [5]:
paths_train_df, paths_val_df, paths_test_df = split_data(image_paths_csv)


In [6]:
paths_train_df

Unnamed: 0,path,age,age_bin,age_bin_label
0,../raw_data2/face_age/036/7684.png,36,7,Mid Adults (35–44)
1,../raw_data2/face_age/053/3153.png,53,8,Mature Adults (45–54)
2,../raw_data2/face_age/063/6184.png,63,9,Older Adults (55–64)
3,../raw_data2/face_age/002/7507.png,2,0,Infants (1–2)
4,../raw_data2/face_age/025/686.png,25,6,Adults (25–34)
...,...,...,...,...
6887,../raw_data2/face_age/040/813.png,40,7,Mid Adults (35–44)
6888,../raw_data2/face_age/018/5794.png,18,5,Young Adults (18–24)
6889,../raw_data2/face_age/042/5802.png,42,7,Mid Adults (35–44)
6890,../raw_data2/face_age/018/4663.png,18,5,Young Adults (18–24)


In [7]:
# Converting the filenames and target class labels into lists for augmented train and test datasets.

train_filenames_list = list(paths_train_df['path'])
train_labels_list = list(paths_train_df['age_bin'])

train_filenames_tensor = tf.constant(train_filenames_list)
train_labels_tensor = tf.constant(train_labels_list)

val_filenames_list = list(paths_val_df['path'])
val_labels_list = list(paths_val_df['age_bin'])

val_filenames_tensor = tf.constant(val_filenames_list)
val_labels_tensor = tf.constant(val_labels_list)


I0000 00:00:1743529726.466996    5556 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 881 MB memory:  -> device: 0, name: NVIDIA L40S, pci bus id: 0000:03:00.0, compute capability: 8.9


In [16]:
batch_size = 128
configurations = {
    'channels': [1,3],
    'num_conv_layers': [3,4],
    'base_filters': [32,64],
    'kernel_size': [3,5],
    'activation': ['relu','swish'],
    'use_skip': [False,True],
    'num_dense_layers': [1,2],
    'dense_units': [128,256],
    'num_classes': [13],
    'dropout_rate': [0.3, 0.5],
    'output_activation': ['softmax','sigmoid'],
    'pool_size': [2],
    'task': ['classification']
}


In [17]:


all_combinations = list(itertools.product(*configurations.values()))
print(f"nr of combinations : %d", len(all_combinations))
valid_configs = []

for combo in all_combinations:
    params = dict(zip(configurations.keys(), combo))
    
    # Enforce: If use_skip=True, use_pooling=False
    if params['use_skip']:
        params['use_pooling'] = False
    else:
        params['use_pooling'] = True  # Or add to search space
    
    valid_configs.append(params)



nr of combinations : %d 1024


In [9]:
train_dataset = load_images_from_paths(train_filenames_tensor,train_labels_tensor, channels=3,ratio=1, batch_size=batch_size)
val_dataset = load_images_from_paths(val_filenames_tensor,val_labels_tensor, channels=3,ratio=1,batch_size=batch_size)


In [None]:
import optuna
import tensorflow as tf
import os

# Assume these functions are defined elsewhere:
# - load_images_from_paths(filenames, labels, channels, ratio, batch_size)
# - build_model_from_config(config)
#
# Also assume that the following dataset variables are defined:
# train_filenames_tensor, train_labels_tensor, val_filenames_tensor, val_labels_tensor

def objective(trial):
    # Define hyperparameters using Optuna suggestions.
    config = {
        'batch_size': trial.suggest_categorical('batch_size',[32,64,128,256]),  
        'channels': trial.suggest_categorical('channels', [1, 3]),
        'num_conv_layers': trial.suggest_categorical('num_conv_layers', [3, 4, 5]),
        'base_filters': trial.suggest_categorical('base_filters', [32, 64, 128]),
        'kernel_size': trial.suggest_categorical('kernel_size', [3, 5, 7]),
        'activation': trial.suggest_categorical('activation', ['relu', 'swish']),
        'use_skip': trial.suggest_categorical('use_skip', [False, True]),
        'num_dense_layers': trial.suggest_categorical('num_dense_layers', [1, 2, 3]),
        'dense_units': trial.suggest_categorical('dense_units', [128, 256]),
        'num_classes': 13,  # Fixed
        'dropout_rate': trial.suggest_categorical('dropout_rate', [0.3, 0.5, 0.7]),
        'output_activation': trial.suggest_categorical('output_activation', ['softmax', 'sigmoid']),
        'pool_size': trial.suggest_categorical('pool_size', [2, 3]),
        'task': 'classification',
    }
    
    # Enforce: If use_skip=True, then use_pooling is False; otherwise, True.
    config['use_pooling'] = False if config['use_skip'] else True

    # Generate a model name from the configuration for logging.
    model_name = "_".join([
        f"{config['channels']}ch",
        f"skip_{config['use_skip']}",
        f"conv{config['num_conv_layers']}",
        f"k{config['kernel_size']}",
        config['activation'],
        f"dense{config['num_dense_layers']}x{config['dense_units']}",
        f"drop{config['dropout_rate']}",
        f"out_{config['output_activation']}"
    ])
    trial.set_user_attr('model_name', model_name)

    # Build the model from the configuration.
    model = build_model_from_config(config)
    model.compile(
        optimizer='lion',
        loss='categorical_crossentropy',  # Fixed loss function.
        metrics=['accuracy']
    )

    # Load datasets using the specified channels and batch_size.
    train_dataset = load_images_from_paths(train_filenames_tensor, train_labels_tensor,
                                           channels=config['channels'], ratio=1,
                                           batch_size=config['batch_size'])
    val_dataset = load_images_from_paths(val_filenames_tensor, val_labels_tensor,
                                         channels=config['channels'], ratio=1,
                                         batch_size=config['batch_size'])

    # Train the model with early stopping.
    history = model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=50,
        verbose=0,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
    )

     # Save the trained model to disk.
    model_dir = "saved_models"
    os.makedirs(model_dir, exist_ok=True)
    model_filename = f"{model_name}_trial_{trial.number}.keras"
    model_path = os.path.join(model_dir, model_filename)
    model.save(model_path)
    
    trial.set_user_attr('model_path', model_path)
    
    val_acc = max(history.history['val_accuracy'])
    print(f"Trial {trial.number} | Model: {model_name} | Val Acc: {val_acc:.4f}")
    
    return val_acc
    
    
study = optuna.create_study(direction='maximize')

# n_trials defines how many different hyperparameter configurations will be evaluated.
# Here it's set to 10 for demonstration. In practice, you might increase this number to explore the search space more thoroughly.
study.optimize(objective, n_trials=100)

best_trials = sorted(study.trials, key=lambda t: t.value, reverse=True)[:10]
best_models_dir = "best_models"
os.makedirs(best_models_dir, exist_ok=True)

print("\nSaving best models:")
for trial in best_trials:
    model_path = trial.user_attrs.get('model_path')
    model_name = trial.user_attrs.get('model_name')
    if model_path and os.path.exists(model_path):
        dest_path = os.path.join(best_models_dir, os.path.basename(model_path))
        shutil.copy(model_path, dest_path)
        print(f"Saved {model_name} with Val Acc: {trial.value:.4f} to {dest_path}")

# Display the best trial's results.
best_trial = study.best_trial
print("\nBest trial:")
print(f"  Model: {best_trial.user_attrs['model_name']}")
print(f"  Validation Accuracy: {best_trial.value:.4f}")
print("  Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")




In [None]:
# model_dict = {}
# model_dir = "saved_models"
# os.makedirs(model_dir, exist_ok=True)
# paths_pre_train_model_dict = {}
# 
# 
# for i, config in enumerate(valid_configs):
#     # Generate unique model name
#     name_parts = [
#         f"{config['channels']}ch",
#         f"skip_{config['use_skip']}",
#         f"conv{config['num_conv_layers']}",
#         f"k{config['kernel_size']}",
#         config['activation'],
#         f"dense{config['num_dense_layers']}x{config['dense_units']}",
#         f"drop{config['dropout_rate']}",
#         f"out_{config['output_activation']}"
#     ]
#     
#     model_name = "_".join(name_parts)
#     # model_filename = model_name + ".keras"
#     # model_path = os.path.join(model_dir, model_filename)
# 
#     # Build and compile
#     model = build_model_from_config(config)
#     model.compile(
#         optimizer='adam',
#         loss='categorical_crossentropy',  # Fixed loss function
#         metrics=['accuracy']
#     )
#     # model.save(model_path,overwrite=True)
#     # model.summary()
#     # paths_pre_train_model_dict[model_name] = {
#     #     'model_path': model_path,
#     #     'config': config
#     # }
#     # 
#     # # Train
#     history = model.fit(
#         train_dataset,  # Dataset yields (images, targets)
#         validation_data=val_dataset,
#         epochs=50,
#         batch_size=batch_size,
#         verbose=2,
#         callbacks=[tf.keras.callbacks.EarlyStopping(patience=10)]
#     )
# 
#     # Store results
#     model_dict[model_name] = {
#         'model': model,
#         'history': history.history,
#         'config': config
#     }
#     print(f"Trained {model_name} | Val acc: {max(history.history['val_accuracy']):.4f}")
#     # print(f"Created {model_name} ")


Epoch 1/50


2025-04-01 17:27:48.351644: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


108/108 - 117s - 1s/step - accuracy: 0.2073 - loss: 2.4318 - val_accuracy: 0.1706 - val_loss: 2.6611
Epoch 2/50


In [None]:
model_dict['1ch_skip_False_conv3_k3_relu_dense1x132_out_softmax']['history']['accuracy']