In [None]:
from utils.utils_functions import *
from utils.datasets import *
import utils.config as config
import tensorflow as tf
import os
from models.registry import *
from keras.callbacks import TensorBoard
from datetime import datetime
import json
import numpy as np
from tensorflow.keras import callbacks


2026-01-27 22:43:51.932243: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2026-01-27 22:43:51.932290: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2026-01-27 22:43:51.932311: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2026-01-27 22:43:51.939229: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [1]:
tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(42)


def convert_to_serializable(obj):
    if isinstance(obj, dict):
        return {key: convert_to_serializable(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_serializable(item) for item in obj]
    elif isinstance(obj, (np.integer, np.floating)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif hasattr(obj, 'numpy'):
        return float(obj.numpy())
    else:
        return obj

os.makedirs('logs/architecture', exist_ok=True)
os.makedirs('saved_models/testing_architecture', exist_ok=True)

for model_name in MODELS_REGISTRY.keys():
    tf.keras.backend.clear_session()
    print(f"\n\n{'='*70}")
    print(f"Training {model_name}")
    print(f"{'='*70}")
    
    log_dir = f'logs/architecture/{model_name}_experiment'
    os.makedirs(log_dir, exist_ok=True)
    
    model = MODELS_REGISTRY[model_name]()
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(config.LR),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[
            MeanIoUMetric(num_classes=3),
            dice_coeff_metric()
        ]
    )

    print("\nBuilding datasets...")
    train_ds = build_dataset(
        config.TRAIN_X,
        config.TRAIN_Y,
        batch_size=config.BATCH_SIZE,
        shuffle=True,
        augment=True
    )

    val_ds = build_dataset(
        config.VAL_X,
        config.VAL_Y,
        batch_size=config.BATCH_SIZE,
        shuffle=False
    )
    
    print(f"Training dataset batch size: {config.BATCH_SIZE}")
    print(f"Validation dataset batch size: {config.BATCH_SIZE}")

    print(f"\nInitializing optimized dynamics logger...")
    dynamics_logger = OptimizedDynamicsLogger(
        val_dataset=val_ds,
        num_classes=3,
        log_dir=log_dir,
        max_samples=20,
        batch_log_freq=1000
    )
    cbs = [
        callbacks.ModelCheckpoint(
            filepath=f"saved_models/testing_architecture/{model_name}_best.h5",
            monitor="val_mean_iou",
            mode="max",
            save_best_only=True,
            verbose=1
        ),
        callbacks.EarlyStopping(
            monitor="val_mean_iou",
            mode="max",
            patience=6,
            restore_best_weights=True,
            verbose=1
        ),
        callbacks.ReduceLROnPlateau(
            monitor='loss',
            factor=0.5,
            patience=3,
            min_lr=1e-6,
            verbose=1
        ),
        callbacks.TensorBoard(
            log_dir=log_dir,
            histogram_freq=1,
            write_graph=True,
            update_freq=1000
        ),
        dynamics_logger
    ]
    
    try:
        history = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=10,
            callbacks=cbs,
            verbose=1
        )
        
        print("\n" + "="*70)
        print("Training completed successfully!")
        print("="*70)

        history_path = f"saved_models/testing_architecture/{model_name}_history.json"
        print(f"\nSaving training history to {history_path}...")
        with open(history_path, "w") as f:
            serializable_history = convert_to_serializable(history.history)
            json.dump(serializable_history, f, indent=2)

        dyn_path = f"saved_models/testing_architecture/{model_name}_dynamics.json"
        print(f"Exporting dynamics to {dyn_path}...")
        dynamics_logger.export_to_json(dyn_path)

        print("\n" + "-"*70)
        print("TRAINING SUMMARY")
        print("-"*70)
        print(f"Model: {model_name}")
        print(f"Best val_loss: {min(history.history['val_loss']):.4f}")
        if 'mean_iou' in history.history:
            print(f"Best val_mean_iou: {max(history.history.get('val_mean_iou', [0])):.4f}")
        print(f"\nFiles saved:")
        print(f"  - Model: saved_models/testing_architecture/{model_name}_best.h5")
        print(f"  - History: {history_path}")
        print(f"  - Dynamics: {dyn_path}")
        print(f"  - Logs: {log_dir}")
        print("-"*70)
        
    except Exception as e:
        print(f"\n{'!'*70}")
        print(f"ERROR during training with {model_name}!")
        print(f"{'!'*70}")
        print(f"Error message: {str(e)}")
        continue
    
    finally:
        print("\nCleaning up memory...")
        del model
        tf.keras.backend.clear_session()

print("\n" + "="*70)
print("ALL EXPERIMENTS COMPLETED")
print("="*70)
print("\nTo view all results in TensorBoard:")
print("  tensorboard --logdir=logs/architecture/")

NameError: name 'tf' is not defined

In [2]:
import tensorflow as tf
from tensorflow.keras.models import load_model

for model_name in MODELS_REGISTRY.keys():
    model = load_model(
        f"saved_models/testing_architecture/{model_name}_best.h5",
        compile=False
    )

    tf.keras.utils.plot_model(
        model,
        to_file=f"saved_models/{model_name}_architecture.png",
        show_shapes=True
    )

ModuleNotFoundError: No module named 'tensorflow'