In [1]:
import numpy as np
import seaborn as sns
import os
import time
import yaml

from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import tensorflow as tf
from tensorflow.keras import layers, Sequential, Model

from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

from Data import Data
from Builder import ConvNetBuilder
from functions import *
from CreateModel import ModelCreate


tf.autograph.set_verbosity(0)
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(physical_devices)
# config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# tf.get_logger().setLevel('ERROR')

2024-06-26 20:25:02.311827: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-26 20:25:02.409135: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2024-06-26 20:25:05.259375: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-26 20:25:05.325248: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-26 20:25:05.328440: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [2]:
def store_simulation_data(detailed_metrics, i, fold_no, train, test, yTestClassT, y_pred_labels):
    # Almacenar los índices de las muestras de entrenamiento y prueba en el diccionario
    detailed_metrics[f'sim_{i+1}'][f"fold_{fold_no}"]["training_indexes"] = train
    detailed_metrics[f'sim_{i+1}'][f"fold_{fold_no}"]["test_indexes"] = test

    # Calcular métricas adicionales
    detailed_metrics[f'sim_{i+1}'][f"fold_{fold_no}"]["metrics"]["f1_score"].append(f1_score(yTestClassT, y_pred_labels))
    detailed_metrics[f'sim_{i+1}'][f"fold_{fold_no}"]["metrics"]["recall"].append(recall_score(yTestClassT, y_pred_labels))
    detailed_metrics[f'sim_{i+1}'][f"fold_{fold_no}"]["metrics"]["precision"].append(precision_score(yTestClassT, y_pred_labels))
    detailed_metrics[f'sim_{i+1}'][f"fold_{fold_no}"]["metrics"]["roc_auc"].append(roc_auc_score(yTestClassT, y_pred_labels))
    detailed_metrics[f'sim_{i+1}'][f"fold_{fold_no}"]["metrics"]["confusion_matrix"].append(confusion_matrix(yTestClassT, y_pred_labels))

In [3]:
def generate_empty_dict(NUM_FOLDERS, NUM_SIMULATIONS):
    # Inicializar el diccionario para almacenar las métricas detalladamente
    detailed_metrics = {}
    for sim in range(1, NUM_SIMULATIONS + 1):
        detailed_metrics[f'sim_{sim}'] = {}

        for fold in range(1, NUM_FOLDERS + 1):
            detailed_metrics[f'sim_{sim}'][f'fold_{fold}'] = {
                'metrics': {
                    'f1_score': [],
                    'recall': [],
                    'precision': [],
                    'roc_auc': [],
                    'confusion_matrix': []
                },
                'training_indexes': None,
                'test_indexes': None}
            
    return detailed_metrics

def reset_weights(model):
    for layer in model.layers:
        if hasattr(layer, 'kernel_initializer') and hasattr(layer, 'bias_initializer'):
            layer.kernel.assign(layer.kernel_initializer(shape=layer.kernel.shape))
            layer.bias.assign(layer.bias_initializer(shape=layer.bias.shape))
        if hasattr(layer,'init'):
            input_dim = layer.input_shape[1]
            new_weights = layer.init((input_dim, layer.output_dim),name='{}_W'.format(layer.name))
            layer.trainable_weights[0].set_value(new_weights.get_value())

def find_best_f1_score(detailed_metrics):
    """
    Encuentra las claves de simulación y fold que tienen el mayor f1_score.

    Args:
        detailed_metrics (dict): Diccionario con las métricas detalladas.

    Returns:
        tuple: Claves de la simulación y el fold con el mayor f1_score.
    """
    best_f1_score = -1
    best_keys = (None, None)

    for sim_key, sim_value in detailed_metrics.items():
        for fold_key, fold_value in sim_value.items():
            f1_scores = fold_value['metrics']['f1_score']
            if f1_scores:  # Asegurarse de que la lista no esté vacía
                max_f1_score = max(f1_scores)
                if max_f1_score > best_f1_score:
                    best_f1_score = max_f1_score
                    best_keys = (sim_key, fold_key)

    return best_keys

In [4]:
epochs = 20
batch_size = 16
margin = 1  # Margin for contrastive loss.
NUM_REFERENCE_IMAGES = 2
NUM_FOLDERS = 5
NUM_SIMULATIONS = 1

base_path = '/home/aacastro/Alejandro/DQ_ACA_2024/A/ZN_1D_imgs/orig/'
data_processor = Data(base_path)
X, Y, ass_f, ass_l = data_processor.get_data('train.npz', 'validation.npz', 'test.npz', 'assess.npz')
Y = np.argmax(Y, axis=1)

results_base_directory = '/home/aacastro/Alejandro/DQ_ACA_2024/C/results/'
yaml_path = '/home/aacastro/Alejandro/DQ_ACA_2024/C/models.yaml'

In [6]:
Model = ModelCreate(yaml_path)
models = Model.create_models()

model = models[0][1]
model.summary()




2024-06-26 20:25:05.883050: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-26 20:25:05.884964: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-26 20:25:05.886795: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [7]:
store_models_temp = {}
for sim in range(1, NUM_SIMULATIONS + 1):
    store_models_temp[f'sim_{sim}'] = {}
    for fold in range(1, NUM_FOLDERS + 1):
        store_models_temp[f'sim_{sim}'][f'fold_{fold}'] = []

detailed_metrics = generate_empty_dict(NUM_FOLDERS, NUM_SIMULATIONS)

models_metrics = {}

reference_images_class0 = np.load('/home/aacastro/Alejandro/DQ_ACA_2024/C/data/centroids_images_OK.npy')
reference_images_class1 = np.load('/home/aacastro/Alejandro/DQ_ACA_2024/C/data/centroids_images_NOK.npy')

input_shape = (264, 18)


# Ahora mismo genero el modelo en la celda anterior para poder visualizarlo mejor, 
# pero en cuanto funcione, se utilizará lo que está aquí comentado

# Model = ModelCreate(yaml_path)
# models = Model.create_models()

for k in range(len(models)):
    # model_name, model, tags, metadata, thresholds = models[k]

    for i in range(NUM_SIMULATIONS):

        # Crear la carpeta de la fecha de la simulación y de la configuración
        simulation_date_folder = os.path.join(results_base_directory, f"fecha_simulacion_{time.strftime('%Y%m%d')}")
        os.makedirs(simulation_date_folder, exist_ok=True)
        configuration_folder = os.path.join(simulation_date_folder, f"model_1")
        os.makedirs(configuration_folder, exist_ok=True)

        kf = KFold(n_splits=NUM_FOLDERS, shuffle=True)
        # Iterar a través de las divisiones
        for fold_no, (train, test) in enumerate(kf.split(X, Y), 1):

            pairs_dict = prepare_data(X[train], Y[train])

            embedding_network = model

            # Define the Siamese network
            input_1 = keras.layers.Input(shape=input_shape)
            input_2 = keras.layers.Input(shape=input_shape)

            # As mentioned above, Siamese Network share weights between
            # tower networks (sister networks). To allow this, we will use
            # same embedding network for both tower networks.
            tower_1 = embedding_network(input_1)
            tower_2 = embedding_network(input_2)

            merge_layer = keras.layers.Lambda(euclidean_distance, output_shape=(1,))(
                [tower_1, tower_2]
            )
            normal_layer = keras.layers.BatchNormalization()(merge_layer)
            output_layer = keras.layers.Dense(1, activation="sigmoid")(normal_layer)
            siamese = keras.Model(inputs=[input_1, input_2], outputs=output_layer)

            siamese.compile(loss=loss(margin=margin), optimizer="RMSprop", metrics=["accuracy"])

            history = siamese.fit(pairs_dict['train']['data'], pairs_dict['train']['labels'], validation_data=(pairs_dict['val']['data'], pairs_dict['val']['labels']),
            batch_size=batch_size, epochs=epochs)

            predicted_labels = classify_images(siamese, X[test], reference_images_class0, reference_images_class1)

            # print(confusion_matrix(ass_l, predicted_labels))

            store_simulation_data(detailed_metrics, i, fold_no, train, test, ass_l, predicted_labels)

            store_models_temp[f'sim_{i+1}'][f'fold_{fold_no}'].append(siamese)

            reset_weights(siamese)

    best_sim_key, best_fold_key = find_best_f1_score(detailed_metrics)
    print(f"The best model was at simulation {best_sim_key} and fold {best_fold_key}")

    model_to_save = store_models_temp[best_sim_key].get(best_fold_key)

    save_path_model = os.path.join(configuration_folder, f'model_{best_sim_key}_{best_fold_key}.keras')

    best_model = model_to_save[0]
    best_model.save(save_path_model)

    # best_model = keras.models.load_model(save_path_model)

    predicted_ass = classify_images(best_model, ass_f, reference_images_class0, reference_images_class1)

    print(confusion_matrix(ass_l, predicted_ass))

Epoch 1/20


I0000 00:00:1719426310.871749  649036 service.cc:145] XLA service 0x7f3af8003b50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1719426310.871787  649036 service.cc:153]   StreamExecutor device (0): NVIDIA A30, Compute Capability 8.0
2024-06-26 20:25:10.938509: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-06-26 20:25:11.275473: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
2024-06-26 20:25:12.527370: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:742] The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.4.131). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


[1m 40/287[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.5061 - loss: 0.2770

I0000 00:00:1719426313.863326  649036 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 18ms/step - accuracy: 0.5010 - loss: 0.2755 - val_accuracy: 0.4991 - val_loss: 0.2513
Epoch 2/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5164 - loss: 0.2625 - val_accuracy: 0.5044 - val_loss: 0.2526
Epoch 3/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5200 - loss: 0.2520 - val_accuracy: 0.4991 - val_loss: 0.2515
Epoch 4/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5061 - loss: 0.2502 - val_accuracy: 0.4887 - val_loss: 0.2511
Epoch 5/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5334 - loss: 0.2495 - val_accuracy: 0.4991 - val_loss: 0.2512
Epoch 6/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5160 - loss: 0.2498 - val_accuracy: 0.5070 - val_loss: 0.2506
Epoch 7/20
[1m287/287[0m [32m━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - accuracy: 0.5041 - loss: 0.2858 - val_accuracy: 0.4991 - val_loss: 0.2503
Epoch 2/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4932 - loss: 0.2588 - val_accuracy: 0.5131 - val_loss: 0.2507
Epoch 3/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5095 - loss: 0.2515 - val_accuracy: 0.5174 - val_loss: 0.2499
Epoch 4/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5153 - loss: 0.2498 - val_accuracy: 0.5157 - val_loss: 0.2498
Epoch 5/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5266 - loss: 0.2491 - val_accuracy: 0.5026 - val_loss: 0.2502
Epoch 6/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5181 - loss: 0.2491 - val_accuracy: 0.5061 - val_loss: 0.2501
Epoch 7/20
[1m287/287[0m 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.5145 - loss: 0.2728 - val_accuracy: 0.5009 - val_loss: 0.2498
Epoch 2/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5399 - loss: 0.2520 - val_accuracy: 0.5862 - val_loss: 0.2435
Epoch 3/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5562 - loss: 0.2451 - val_accuracy: 0.5880 - val_loss: 0.2401
Epoch 4/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5642 - loss: 0.2426 - val_accuracy: 0.5880 - val_loss: 0.2371
Epoch 5/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5739 - loss: 0.2412 - val_accuracy: 0.5967 - val_loss: 0.2363
Epoch 6/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5866 - loss: 0.2405 - val_accuracy: 0.6080 - val_loss: 0.2321
Epoch 7/20
[1m287/287[0m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.5071 - loss: 0.2517 - val_accuracy: 0.5009 - val_loss: 0.2501
Epoch 2/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5019 - loss: 0.2513 - val_accuracy: 0.4983 - val_loss: 0.2537
Epoch 3/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5128 - loss: 0.2512 - val_accuracy: 0.4974 - val_loss: 0.2532
Epoch 4/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5238 - loss: 0.2500 - val_accuracy: 0.5000 - val_loss: 0.2529
Epoch 5/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5325 - loss: 0.2496 - val_accuracy: 0.5105 - val_loss: 0.2520
Epoch 6/20
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5130 - loss: 0.2503 - val_accuracy: 0.4983 - val_loss: 0.2523
Epoch 7/20
[1m287/287[0m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28