In [2]:
import os
# os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"
import shutil
import sys
from pathlib import Path

# Add parent directory to sys.path
parent_dir = Path("..").resolve()
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))
import optuna
import tensorflow as tf
import os
import importlib
import common_utils
from common_utils import *
importlib.reload(common_utils)

import pandas as pd
import itertools
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from common_utils import (
    get_unique_image_shapes,
    get_unique_image_paths,
    load_images_from_paths,
    build_image_dataframe,
    split_data,
    bin_ages,
    build_cnn_model,
    build_model_from_config
)


In [4]:
image_paths_csv = pd.read_csv('./processed_data/image_paths.csv')

In [5]:
paths_train_df, paths_val_df, paths_test_df = split_data(image_paths_csv)


In [6]:
paths_train_df

Unnamed: 0,path,age,age_bin,age_bin_label
0,../raw_data2/face_age/015/5220.png,15,4,Teens (13–17)
1,../raw_data2/face_age/016/8444.png,16,4,Teens (13–17)
2,../raw_data2/face_age/052/1059.png,52,8,Mature Adults (45–54)
3,../raw_data2/face_age/013/3292.png,13,4,Teens (13–17)
4,../raw_data2/face_age/025/6427.png,25,6,Adults (25–34)
...,...,...,...,...
6887,../raw_data2/face_age/027/3983.png,27,6,Adults (25–34)
6888,../raw_data2/face_age/024/7066.png,24,5,Young Adults (18–24)
6889,../raw_data2/face_age/002/830.png,2,0,Infants (1–2)
6890,../raw_data2/face_age/001/7108.png,1,0,Infants (1–2)


In [7]:
# Converting the filenames and target class labels into lists for augmented train and test datasets.

train_filenames_list = list(paths_train_df['path'])
train_labels_list = list(paths_train_df['age_bin'])

train_filenames_tensor = tf.constant(train_filenames_list)
train_labels_tensor = tf.constant(train_labels_list)

val_filenames_list = list(paths_val_df['path'])
val_labels_list = list(paths_val_df['age_bin'])

val_filenames_tensor = tf.constant(val_filenames_list)
val_labels_tensor = tf.constant(val_labels_list)


2025-04-02 10:19:47.440012: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3
2025-04-02 10:19:47.440061: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-04-02 10:19:47.440079: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-04-02 10:19:47.440098: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-04-02 10:19:47.440116: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [8]:
batch_size = 128
configurations = {
    'channels': [1,3],
    'num_conv_layers': [3,4],
    'base_filters': [32,64],
    'kernel_size': [3,5],
    'activation': ['relu','swish'],
    'use_skip': [False,True],
    'num_dense_layers': [1,2],
    'dense_units': [128,256],
    'num_classes': [13],
    'dropout_rate': [0.3, 0.5],
    'output_activation': ['softmax','sigmoid'],
    'pool_size': [2],
    'task': ['classification']
}




In [8]:
# Defining the architecture of the sequential neural network.

model=build_sequential_cnn_model(
    channels=1,
    dropout_rate=0,
    task="classification",
    num_classes=13,
    num_conv_layers=4,
    conv_filters=[32, 64, 128, 256],
    kernel_size=3,
    activation="relu",
    num_dense_layers=1,
    dense_units=[132],
    output_activation="softmax"
)


In [9]:


all_combinations = list(itertools.product(*configurations.values()))
print(f"nr of combinations : %d", len(all_combinations))
valid_configs = []



nr of combinations : %d 1024


In [10]:
train_dataset = load_images_from_paths(train_filenames_tensor,train_labels_tensor, channels=3,ratio=1, batch_size=batch_size)
val_dataset = load_images_from_paths(val_filenames_tensor,val_labels_tensor, channels=3,ratio=1,batch_size=batch_size)


In [11]:

import gc
# Assume these functions are defined elsewhere:
# - load_images_from_paths(filenames, labels, channels, ratio, batch_size)
# - build_model_from_config(config)
#
# Also assume that the following dataset variables are defined:
# train_filenames_tensor, train_labels_tensor, val_filenames_tensor, val_labels_tensor

def objective(trial):
    # Define hyperparameters using Optuna suggestions.
    config = {
        'batch_size': trial.suggest_categorical('batch_size',[32,64,128,256]),  
        'channels': trial.suggest_categorical('channels', [1, 3]),
        'num_conv_layers': trial.suggest_categorical('num_conv_layers', [3, 4, 5]),
        'base_filters': trial.suggest_categorical('base_filters', [32, 64, 128]),
        'kernel_size': trial.suggest_categorical('kernel_size', [3, 5, 7]),
        'activation': trial.suggest_categorical('activation', ['relu', 'swish']),
        'use_skip': False,
        'num_dense_layers': trial.suggest_categorical('num_dense_layers', [1, 2, 3]),
        'dense_units': trial.suggest_categorical('dense_units', [128, 256]),
        'num_classes': 13,  # Fixed
        'dropout_rate': trial.suggest_categorical('dropout_rate', [0.3, 0.5, 0.7]),
        'output_activation': trial.suggest_categorical('output_activation', ['softmax', 'sigmoid']),
        'pool_size': trial.suggest_categorical('pool_size', [2, 3]),
        'task': 'classification',
    }
    
    # Enforce: If use_skip=True, then use_pooling is False; otherwise, True.
    config['use_pooling'] = False if config['use_skip'] else True

    # Generate a model name from the configuration for logging.
    model_name = "_".join([
        f"{config['channels']}ch",
        f"skip_{config['use_skip']}",
        f"conv{config['num_conv_layers']}",
        f"k{config['kernel_size']}",
        config['activation'],
        f"dense{config['num_dense_layers']}x{config['dense_units']}",
        f"drop{config['dropout_rate']}",
        f"out_{config['output_activation']}"
    ])
    trial.set_user_attr('model_name', model_name)
# Define model save directory and filename.
    model_dir = "saved_models"
    os.makedirs(model_dir, exist_ok=True)
    model_filename = f"{model_name}.keras"  # Note: no trial number here.
    model_path = os.path.join(model_dir, model_filename)
    
    # If the model has already been trained, load it and evaluate.
    if os.path.exists(model_path):
        print(f"Model {model_name} already trained. Loading saved model.")
        model = tf.keras.models.load_model(model_path)
        val_dataset = load_images_from_paths(val_filenames_tensor, val_labels_tensor,
                                             channels=config['channels'], ratio=1,
                                             batch_size=config['batch_size'])
        loss, val_acc = model.evaluate(val_dataset, verbose=0)
        trial.set_user_attr('model_path', model_path)
        return val_acc

    # Build the model from the configuration.
    model = build_model_from_config(config)
    model.compile(
        optimizer='lion',
        loss='categorical_crossentropy',  # Fixed loss function.
        metrics=['accuracy']
    )

    # Load datasets using the specified channels and batch_size.
    train_dataset = load_images_from_paths(train_filenames_tensor, train_labels_tensor,
                                           channels=config['channels'], ratio=1,
                                           batch_size=config['batch_size'])
    val_dataset = load_images_from_paths(val_filenames_tensor, val_labels_tensor,
                                         channels=config['channels'], ratio=1,
                                         batch_size=config['batch_size'])

    # Train the model with early stopping.
    history = model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=50,
        verbose=0,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
    )

     # Save the trained model to disk.
    model_dir = "saved_models"
    os.makedirs(model_dir, exist_ok=True)
    model_filename = f"{model_name}_trial_{trial.number}.keras"
    model_path = os.path.join(model_dir, model_filename)
    model.save(model_path)

    del train_dataset, val_dataset, model
    tf.keras.backend.clear_session()

    trial.set_user_attr('model_path', model_path)
    
    val_acc = max(history.history['val_accuracy'])
    print(f"Trial {trial.number} | Model: {model_name} | Val Acc: {val_acc:.4f}")
    gc.collect()
    return val_acc
    
    
study = optuna.create_study(direction='maximize')

# n_trials defines how many different hyperparameter configurations will be evaluated.
# Here it's set to 10 for demonstration. In practice, you might increase this number to explore the search space more thoroughly.
study.optimize(objective, n_trials=100)

best_trials = sorted(study.trials, key=lambda t: t.value, reverse=True)[:10]
best_models_dir = "best_models"
os.makedirs(best_models_dir, exist_ok=True)

print("\nSaving best models:")
for trial in best_trials:
    model_path = trial.user_attrs.get('model_path')
    model_name = trial.user_attrs.get('model_name')
    if model_path and os.path.exists(model_path):
        dest_path = os.path.join(best_models_dir, os.path.basename(model_path))
        shutil.copy(model_path, dest_path)
        print(f"Saved {model_name} with Val Acc: {trial.value:.4f} to {dest_path}")

# Display the best trial's results.
best_trial = study.best_trial
print("\nBest trial:")
print(f"  Model: {best_trial.user_attrs['model_name']}")
print(f"  Validation Accuracy: {best_trial.value:.4f}")
print("  Hyperparameters:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")




[I 2025-04-02 10:19:47,783] A new study created in memory with name: no-name-feb6120a-7c13-40a7-a11f-fea6f70e072f
[W 2025-04-02 10:19:47,786] Trial 0 failed with parameters: {'batch_size': 64, 'channels': 3, 'num_conv_layers': 5, 'base_filters': 32, 'kernel_size': 3, 'activation': 'swish', 'num_dense_layers': 1, 'dense_units': 256, 'dropout_rate': 0.3, 'output_activation': 'sigmoid', 'pool_size': 3} because of the following error: TypeError("build_cnn_model() got an unexpected keyword argument 'use_pooling'").
Traceback (most recent call last):
  File "/Users/bytedance/PycharmProjects/DL-assignment/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/00/rgy_w4m1779bfcbsp27pgb_r0000gn/T/ipykernel_62546/4082767753.py", line 61, in objective
    model = build_model_from_config(config)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bytedance/PycharmProjects

TypeError: build_cnn_model() got an unexpected keyword argument 'use_pooling'

In [None]:
# model_dict = {}
# model_dir = "saved_models"
# os.makedirs(model_dir, exist_ok=True)
# paths_pre_train_model_dict = {}
# 
# 
# for i, config in enumerate(valid_configs):
#     # Generate unique model name
#     name_parts = [
#         f"{config['channels']}ch",
#         f"skip_{config['use_skip']}",
#         f"conv{config['num_conv_layers']}",
#         f"k{config['kernel_size']}",
#         config['activation'],
#         f"dense{config['num_dense_layers']}x{config['dense_units']}",
#         f"drop{config['dropout_rate']}",
#         f"out_{config['output_activation']}"
#     ]
#     
#     model_name = "_".join(name_parts)
#     # model_filename = model_name + ".keras"
#     # model_path = os.path.join(model_dir, model_filename)
# 
#     # Build and compile
#     model = build_model_from_config(config)
#     model.compile(
#         optimizer='adam',
#         loss='categorical_crossentropy',  # Fixed loss function
#         metrics=['accuracy']
#     )
#     # model.save(model_path,overwrite=True)
#     # model.summary()
#     # paths_pre_train_model_dict[model_name] = {
#     #     'model_path': model_path,
#     #     'config': config
#     # }
#     # 
#     # # Train
#     history = model.fit(
#         train_dataset,  # Dataset yields (images, targets)
#         validation_data=val_dataset,
#         epochs=50,
#         batch_size=batch_size,
#         verbose=2,
#         callbacks=[tf.keras.callbacks.EarlyStopping(patience=10)]
#     )
# 
#     # Store results
#     model_dict[model_name] = {
#         'model': model,
#         'history': history.history,
#         'config': config
#     }
#     print(f"Trained {model_name} | Val acc: {max(history.history['val_accuracy']):.4f}")
#     # print(f"Created {model_name} ")


In [None]:
model_dict['1ch_skip_False_conv3_k3_relu_dense1x132_out_softmax']['history']['accuracy']