In [1]:
import os
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices=false'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
import optuna
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, classification_report, roc_curve, auc, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.regularizers import l2, l1_l2
import matplotlib.pyplot as plt
import gc

2025-06-01 08:43:24.425393: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748778204.547636 3723708 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748778204.582478 3723708 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748778204.849439 3723708 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748778204.849475 3723708 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748778204.849477 3723708 computation_placer.cc:177] computation placer alr

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Tratamento dos Dados

In [None]:
df = pd.read_csv('../dataset_fz_cz.csv')

df

In [None]:
fft_columns = df.iloc[:, :1500]
def safe_complex(x):
    try:
        return complex(x)
    except (ValueError, TypeError):
        return np.nan
fft_complex = fft_columns.map(safe_complex)
df.iloc[:, :1500] = fft_complex

## Separando X e Y

In [None]:
y = df['classe']
X = df.drop(['classe','voluntario'], axis=1)

In [None]:
X

In [None]:
y.value_counts()

## Divisão em conjunto de Treino e Teste

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
X_train_rec_f0 = X_train.iloc[:, :500]
X_test_rec_f0 = X_test.iloc[:, :500]

In [None]:
X_train.iloc[:,:1500] = X_train.iloc[:,:1500].map(lambda x: np.abs(x))
X_test.iloc[:,:1500] = X_test.iloc[:,:1500].map(lambda x: np.abs(x))

In [None]:
X_train.shape, X_test.shape

In [None]:
X_train

In [None]:
indice_f0 = list(range(500)) + [1500] + list(range(1503, X_train.shape[1]))
X_train_f0 = X_train.iloc[:, indice_f0]
X_test_f0 = X_test.iloc[:, indice_f0]

In [None]:
indice_h2 = list(range(1000)) + [1500, 1501] + list(range(1503, X_train.shape[1]))
X_train_h2 = X_train.iloc[:, indice_h2]
X_test_h2 = X_test.iloc[:, indice_h2]

## Codificar Variáveis Categóricas

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

categorical_columns = ['Faixa_Moduladora', 
                     'Intensidade_db', 
                     'Canal']

numerical_columns = ['Freq_analisada_Hz']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ],
    remainder='passthrough'
)

X_train_f0 = preprocessor.fit_transform(X_train_f0)
X_test_f0 = preprocessor.transform(X_test_f0)

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

categorical_columns = ['Faixa_Moduladora', 
                     'Intensidade_db', 
                     'Canal']

numerical_columns = ['Freq_analisada_Hz', 'Freq_h2_analisada_Hz',]

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ],
    remainder='passthrough'
)

X_train_h2 = preprocessor.fit_transform(X_train_h2)
X_test_h2 = preprocessor.transform(X_test_h2)

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

categorical_columns = ['Faixa_Moduladora', 
                     'Intensidade_db', 
                     'Canal']

numerical_columns = ['Freq_analisada_Hz', 
                     'Freq_h2_analisada_Hz',
                     'Freq_h3_analisada_Hz']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ],
    remainder='passthrough'
)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

Com o ColumnTransformer, as colunas após o pré-processamento podem ter nomes alterados (ex.: Faixa_40, Faixa_80). 

In [None]:
colunas_processadas = preprocessor.get_feature_names_out()

In [None]:
colunas_processadas

In [None]:
pd.DataFrame(X_train).head()

In [None]:
X_train

In [None]:
pd.DataFrame(X_train).to_parquet('f0_h2_h3/X_train.parquet', index=False)
pd.DataFrame(X_test).to_parquet('f0_h2_h3/X_test.parquet', index=False)
pd.DataFrame(y_train).to_parquet('f0_h2_h3/y_train.parquet', index=False)
pd.DataFrame(y_test).to_parquet('f0_h2_h3/y_test.parquet', index=False)

In [None]:
X_train_rec_f0.to_csv('f0/X_train_rec_f0.csv', index=False)
X_test_rec_f0.to_csv('f0/X_test_rec_f0.csv', index=False)

In [None]:
pd.DataFrame(X_train_f0).to_parquet('f0/X_train_f0.parquet', index=False)
pd.DataFrame(X_test_f0).to_parquet('f0/X_test_f0.parquet', index=False)
pd.DataFrame(y_train).to_parquet('f0/y_train_f0.parquet', index=False)
pd.DataFrame(y_test).to_parquet('f0/y_test_f0.parquet', index=False)

In [None]:
pd.DataFrame(X_train_h2).to_parquet('f0_h2/X_train_h2.parquet', index=False)
pd.DataFrame(X_test_h2).to_parquet('f0_h2/X_test_h2.parquet', index=False)
pd.DataFrame(y_train).to_parquet('f0_h2/y_train_h2.parquet', index=False)
pd.DataFrame(y_test).to_parquet('f0_h2/y_test_h2.parquet', index=False)

Até aqui, foi realizado a separação do X e Y, nos quais foram separados em conjunto de teste e treinamento. Além disso, os conjuntos de treino para os modelos de ML foram transformados para pegar apenas os módulos, já os em formato retangular, será utilizado na CSM e MSC. Obtendo assim, 4 datasets finais

# Desenvolvimento dos Modelos

## Carregando os Dados

In [4]:
X_train_f0 = pd.read_parquet('f0/X_train_f0.parquet')
X_test_f0 = pd.read_parquet('f0/X_test_f0.parquet')
y_train_f0 = pd.read_parquet('f0/y_train_f0.parquet')
y_test_f0 = pd.read_parquet('f0/y_test_f0.parquet')

In [5]:
X_train_h2 = pd.read_parquet('f0_h2/X_train_h2.parquet')
X_test_h2 = pd.read_parquet('f0_h2/X_test_h2.parquet')
y_train_h2 = pd.read_parquet('f0_h2/y_train_h2.parquet')
y_test_h2 = pd.read_parquet('f0_h2/y_test_h2.parquet')

In [6]:
X_train = pd.read_parquet('f0_h2_h3/X_train.parquet')
X_test = pd.read_parquet('f0_h2_h3/X_test.parquet')
y_train = pd.read_parquet('f0_h2_h3/y_train.parquet')
y_test = pd.read_parquet('f0_h2_h3/y_test.parquet')

In [7]:
datasets = {
    'Fundamental': {
        'X_train': X_train_f0,
        'y_train': y_train_f0,
        'X_test': X_test_f0,
        'y_test': y_test_f0
    },
    'Fundamental + 2º Harmônico': {
        'X_train': X_train_h2,
        'y_train': y_train_h2,
        'X_test': X_test_h2,
        'y_test': y_test_h2
    },
    'Fundamental + 2º + 3º Harmônicos': {
        'X_train': X_train,
        'y_train': y_train,
        'X_test': X_test,
        'y_test': y_test
    }
}

## Criando o modelo RNA

### RNA Padrão

In [None]:
def create_model(trial, input_shape):
    n_layers = trial.suggest_int('n_layers', 1, 100)
    units = trial.suggest_int('units', 32, 512, step=32)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-1, log=True)
    
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    for i in range(n_layers):
        model.add(layers.Dense(units, activation='relu'))
        model.add(layers.Dropout(dropout))
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model

### RNA variando parametros por camada

In [8]:
def create_model_layer(trial, input_shape):
    n_layers = trial.suggest_int('n_layers', 1, 50)
    units = []
    dropouts = []

    for i in range(n_layers):
        units.append(trial.suggest_int(f'units_{i}', 32, 512, step=32))
        dropouts.append(trial.suggest_float(f'dropout_{i}', 0.0, 0.5))

    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-1, log=True)
    l2_weight = trial.suggest_float('l2_weight', 1e-5, 1e-2, log=True)
    l1_weight = trial.suggest_float('l1_weight', 1e-5, 1e-2, log=True)
    
    model = models.Sequential()
    model.add(layers.InputLayer(shape=input_shape))
    
    for i in range(n_layers):
        # activation_function = trial.suggest_categorical(f'activation_{i}', ['relu', 'leaky_relu', 'elu'])
        # if activation_function == 'leaky_relu':
        #     model.add(layers.Dense(units[i], kernel_regularizer=l1_l2(l1=l1_weight, l2=l2_weight)))
        #     model.add(layers.LeakyReLU())
        # else:
        #     model.add(layers.Dense(units[i], activation=activation_function, kernel_regularizer=l1_l2(l1=l1_weight, l2=l2_weight)))
            
        model.add(layers.Dense(units[i], activation='relu', kernel_regularizer=l1_l2(l1=l1_weight, l2=l2_weight)))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(dropouts[i]))
    
    model.add(layers.Dense(1, activation='sigmoid'))

    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
    
    model.compile(
        # optimizer=tf.keras.optimizers.get({
        #     'class_name': optimizer_name,
        #     'config': {
        #         'learning_rate': learning_rate
        #     }
        # }),
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model

### RNA + LSTM

In [None]:
def create_sequence(data, labels, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length + 1):
        X.append(data[i:i + seq_length])
        y.append(labels[i + seq_length - 1])
    return np.array(X), np.array(y)

X_train_f0_seq, y_train_f0_seq = create_sequence(X_train_f0.values, y_train_f0.values, 20)
X_test_f0_seq, y_test_f0_seq = create_sequence(X_test_f0.values, y_test_f0.values, 20)

X_train_h2_seq, y_train_h2_seq = create_sequence(X_train_h2.values, y_train_h2.values, 20)
X_test_h2_seq, y_test_h2_seq = create_sequence(X_test_h2.values, y_test_h2.values, 20)

X_train_seq, y_train_seq = create_sequence(X_train.values, y_train.values, 20)
X_test_seq, y_test_seq = create_sequence(X_test.values, y_test.values, 20)
        

In [None]:
datasets_seq = {
    'Fundamental': {
        'X_train': X_train_f0_seq,
        'y_train': y_train_f0_seq,
        'X_test': X_test_f0_seq,
        'y_test': y_test_f0_seq
    },
    'Fundamental + 2º Harmônico': {
        'X_train': X_train_h2_seq,
        'y_train': y_train_h2_seq,
        'X_test': X_test_h2_seq,
        'y_test': y_test_h2_seq
    },
    'Fundamental + 2º + 3º Harmônicos': {
        'X_train': X_train_seq,
        'y_train': y_train_seq,
        'X_test': X_test_seq,
        'y_test': y_test_seq
    }
}

In [None]:
def create_lstm_model(trial, input_shape):
    # Hiperparâmetros para LSTM
    n_lstm_layers = trial.suggest_int('n_lstm_layers', 1, 10)
    lstm_units = [trial.suggest_int(f'lstm_units_{i}', 32, 256) for i in range(n_lstm_layers)]
    lstm_dropout = [trial.suggest_float(f'lstm_dropout_{i}', 0.0, 0.5) for i in range(n_lstm_layers)]
    
    # Hiperparâmetros para Dense
    n_dense_layers = trial.suggest_int('n_dense_layers', 1, 50)
    dense_units = [trial.suggest_int(f'dense_units_{i}', 32, 512) for i in range(n_dense_layers)]
    dense_dropout = [trial.suggest_float(f'dense_dropout_{i}', 0.0, 0.5) for i in range(n_dense_layers)]
    
    # Construção do modelo
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))  # (look_back, n_features)
    
    # Camadas LSTM
    for i in range(n_lstm_layers):
        return_sequences = (i < n_lstm_layers - 1)  # Retorna sequências se não for a última camada
        model.add(layers.LSTM(
            lstm_units[i],
            return_sequences=return_sequences,
            kernel_regularizer=l2(trial.suggest_float('l2_weight', 1e-5, 1e-2))
        ))
        model.add(layers.Dropout(lstm_dropout[i]))
    
    # Camadas Densas
    for i in range(n_dense_layers):
        model.add(layers.Dense(dense_units[i], activation='relu'))
        model.add(layers.Dropout(dense_dropout[i]))
    
    # Saída
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Compilar
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=trial.suggest_float('learning_rate', 1e-6, 1e-1, log=True)),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model

### CNN-LSTM

In [None]:
def create_sequence(data, labels, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length + 1):
        X.append(data[i:i + seq_length])
        y.append(labels[i + seq_length - 1])
    return np.array(X), np.array(y)

X_train_f0_seq, y_train_f0_seq = create_sequence(X_train_f0.values, y_train_f0.values, 20)
X_test_f0_seq, y_test_f0_seq = create_sequence(X_test_f0.values, y_test_f0.values, 20)

X_train_h2_seq, y_train_h2_seq = create_sequence(X_train_h2.values, y_train_h2.values, 20)
X_test_h2_seq, y_test_h2_seq = create_sequence(X_test_h2.values, y_test_h2.values, 20)

X_train_seq, y_train_seq = create_sequence(X_train.values, y_train.values, 20)
X_test_seq, y_test_seq = create_sequence(X_test.values, y_test.values, 20)
        

In [None]:
datasets_seq = {
    'Fundamental': {
        'X_train': X_train_f0_seq,
        'y_train': y_train_f0_seq,
        'X_test': X_test_f0_seq,
        'y_test': y_test_f0_seq
    },
    'Fundamental + 2º Harmônico': {
        'X_train': X_train_h2_seq,
        'y_train': y_train_h2_seq,
        'X_test': X_test_h2_seq,
        'y_test': y_test_h2_seq
    },
    'Fundamental + 2º + 3º Harmônicos': {
        'X_train': X_train_seq,
        'y_train': y_train_seq,
        'X_test': X_test_seq,
        'y_test': y_test_seq
    }
}

In [None]:
def create_cnn_lstm_model(trial, input_shape):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    
    current_seq_length = input_shape[0] # Comprimento da sequência inicial

    n_cnn_layers = trial.suggest_int('n_cnn_layers', 1, 3) # Número de blocos CNN (Conv1D + MaxPool + Dropout)

    # Hiperparâmetros para as camadas CNN
    for i in range(n_cnn_layers):
        filters = trial.suggest_int(f'cnn_filters_{i}', 32, 256, step=32)
        
        # Sugere kernel_size DENTRO do loop, limitado pelo current_seq_length
        max_kernel_size = min(current_seq_length, 5) # Maximo 5, ou o que restar da sequência
        if max_kernel_size < 2: # Se for menor que 2, não podemos usar kernel_size 2 ou maior
            raise optuna.exceptions.TrialPruned(
                f"Comprimento da sequência ({current_seq_length}) é muito pequeno para kernel_size >= 2 na camada CNN {i+1}. Trial podado."
            )
        kernel_size = trial.suggest_int(f'cnn_kernel_size_{i}', 2, max_kernel_size)

        # Calcula o novo comprimento da sequência após a Conv1D (padding='valid')
        new_seq_length_after_conv = (current_seq_length - kernel_size) + 1
        
        # Sugere pool_size DENTRO do loop, limitado pelo new_seq_length_after_conv
        max_pooling_size = min(new_seq_length_after_conv, 4) # Maximo 4, ou o que restar após Conv1D
        if max_pooling_size < 2: # Se for menor que 2, não podemos usar pool_size 2 ou maior
            raise optuna.exceptions.TrialPruned(
                f"Comprimento da sequência após Conv1D ({new_seq_length_after_conv}) é muito pequeno para pool_size >= 2 na camada CNN {i+1}. Trial podado."
            )
        pool_size = trial.suggest_int(f'cnn_pooling_size_{i}', 2, max_pooling_size)
        
        cnn_dropout_rate = trial.suggest_float(f'cnn_dropout_{i}', 0.0, 0.5)

        model.add(layers.Conv1D(
            filters=filters,
            kernel_size=kernel_size,
            activation='relu',
            padding='valid' # Redução de dimensão
        ))
        model.add(layers.MaxPooling1D(pool_size=pool_size))
        model.add(layers.Dropout(cnn_dropout_rate))

        # Atualiza o comprimento da sequência para a próxima iteração do loop CNN
        current_seq_length = int(np.floor(new_seq_length_after_conv / pool_size))
        
        # Se o comprimento da sequência se tornar 0 ou negativo antes da última camada CNN, podar o trial
        if current_seq_length <= 0 and i < n_cnn_layers - 1:
            raise optuna.exceptions.TrialPruned(
                f"Comprimento da sequência se tornou <= 0 após a camada CNN {i+1}. Trial podado."
            )

    n_lstm_layers = trial.suggest_int('n_lstm_layers', 1, 5)
    lstm_units = [trial.suggest_int(f'lstm_units_{i}', 32, 256) for i in range(n_lstm_layers)]
    lstm_dropout = [trial.suggest_float(f'lstm_dropout_{i}', 0.0, 0.5) for i in range(n_lstm_layers)]
    
    n_dense_layers = trial.suggest_int('n_dense_layers', 1, 5)
    dense_units = [trial.suggest_int(f'dense_units_{i}', 32, 512) for i in range(n_dense_layers)]
    dense_dropout = [trial.suggest_float(f'dense_dropout_{i}', 0.0, 0.5) for i in range(n_dense_layers)]
    # Camadas LSTM
    for i in range(n_lstm_layers):
        return_sequences = (i < n_lstm_layers - 1)
        model.add(layers.LSTM(
            lstm_units[i],
            return_sequences=return_sequences,
            kernel_regularizer=l2(trial.suggest_float('l2_weight', 1e-5, 1e-2, log=True))
        ))
        model.add(layers.Dropout(lstm_dropout[i]))

    # Camadas Densas
    for i in range(n_dense_layers):
        model.add(layers.Dense(dense_units[i], activation='relu'))
        model.add(layers.Dropout(dense_dropout[i]))

    # Saída
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Compilar
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=trial.suggest_float('learning_rate', 1e-6, 1e-1, log=True)),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model
    

## Optuna

### Função Objetiva para todas as frequências

In [9]:
def objective(trial, X_train, y_train):
    input_shape = X_train.shape[1:]

    n_split = 5
    kf = StratifiedKFold(n_splits=n_split, shuffle=True, random_state=42)
    fold_aucs = []

    y_train_1d = y_train.squeeze().astype(int)
    classes = np.unique(y_train_1d)
    class_weights = compute_class_weight('balanced', classes=classes, y=y_train_1d)
    class_weight = {i: w for i, w in zip(classes, class_weights)}

    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train, y_train_1d)):
        print(f"=== Iniciando Fold {fold + 1} de {n_split} ===")

        X_train_fold, X_val_fold = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_train_fold, y_val_fold = y_train_1d[train_idx], y_train_1d[val_idx]

        model = create_model_layer(trial, input_shape)  # Mudar se quer RNA
        #model = create_lstm_model(trial, input_shape) #Mudar se quer RNA ou RNA + LSTM ou CNN + LSTM
        #model = create_cnn_lstm_model(trial, input_shape)  # CNN + LSTM

        callback = tf.keras.callbacks.EarlyStopping(
            monitor='val_auc',
            patience=30,
            verbose=0,
            mode='max',
            restore_best_weights=True
        )

        lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_auc',
            factor=0.5,
            patience=20,
            mode='max',
            min_lr=1e-7,
        )

        history = model.fit(
            X_train_fold,
            y_train_fold,
            epochs=100,
            batch_size=32,
            validation_data=(X_val_fold, y_val_fold),
            callbacks=[callback, lr_scheduler],
            verbose=1,
            class_weight=class_weight,
        )

        best_auc_fold = max(history.history['val_auc'])
        fold_aucs.append(best_auc_fold)
        print(f"AUC do Fold {fold + 1}: {best_auc_fold:.4f}")

        del model
        del callback, lr_scheduler, history
        tf.keras.backend.clear_session()
        gc.collect()
    print("=== Fim dos Folds ===")
    
    mean_auc = np.mean(fold_aucs)
    print(f"AUC Médio dos Folds: {mean_auc:.4f}")

    return mean_auc


### Treinando Modelo com os melhores hiperparametros - RNA

In [None]:
def build_final_model(best_params, input_shape):
    n_layers = best_params['n_layers']
    units = best_params['units']
    dropout = best_params['dropout']
    learning_rate = best_params['learning_rate']
   
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    for i in range(n_layers):
        model.add(layers.Dense(units, activation='relu'))
        model.add(layers.Dropout(dropout))
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

In [10]:
def build_final_model_layer(best_params, input_shape):
    model = models.Sequential()
    model.add(layers.InputLayer(shape=input_shape))
    
    # Extrai hiperparâmetros por camada
    n_layers = best_params['n_layers']
    units = [best_params[f'units_{i}'] for i in range(n_layers)]
    dropouts = [best_params[f'dropout_{i}'] for i in range(n_layers)]
    
    # Constrói as camadas
    for i in range(n_layers):
        # activation_function = best_params.get(f'activation_{i}', 'relu')
        # if activation_function == 'leaky_relu':
        #     model.add(layers.Dense(units[i], kernel_regularizer=l1_l2(l1=best_params['l1_weight'], l2=best_params['l2_weight'])))
        #     model.add(layers.LeakyReLU(alpha=0.1))
        # else:
        #     model.add(layers.Dense(units[i], activation=activation_function, kernel_regularizer=l1_l2(l1=best_params['l1_weight'], l2=best_params['l2_weight'])))
        model.add(layers.Dense(units[i], activation='relu', kernel_regularizer=l1_l2(l1=best_params['l1_weight'], l2=best_params['l2_weight'])))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(dropouts[i]))
    
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(
        #optimizer=tf.keras.optimizers.get(best_params['optimizer'])(learning_rate=best_params['learning_rate']),
        optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model

In [None]:
def build_final_model_lstm(best_params, input_shape):
    
    n_lstm_layers = best_params['n_lstm_layers']
    lstm_dropout = [best_params[f'lstm_dropout_{i}'] for i in range(n_lstm_layers)]
    lstm_units = [best_params[f'lstm_units_{i}'] for i in range(n_lstm_layers)]
    
    n_dense_layers = best_params['n_dense_layers']
    dense_units = [best_params[f'dense_units_{i}'] for i in range(n_dense_layers)]
    dense_dropout = [best_params[f'dense_dropout_{i}'] for i in range(n_dense_layers)]
    
    # Construção do modelo
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))  # (look_back, n_features)
    
    # Camadas LSTM
    for i in range(n_lstm_layers):
        return_sequences = (i < n_lstm_layers - 1)  # Retorna sequências se não for a última camada
        model.add(layers.LSTM(
            lstm_units[i],
            return_sequences=return_sequences,
            kernel_regularizer=l2(best_params['l2_weight'])
        ))
        model.add(layers.Dropout(lstm_dropout[i]))
    
    # Camadas Densas
    for i in range(n_dense_layers):
        model.add(layers.Dense(dense_units[i], activation='relu'))
        model.add(layers.Dropout(dense_dropout[i]))
    
    # Saída
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Compilar
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model

In [None]:
def build_final_model_cnn_lstm(best_params, input_shape):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))

    current_seq_length = input_shape[0]
    
    n_cnn_layers = best_params['n_cnn_layers']

    # Camadas Conv1D - Construídas com os melhores parâmetros por camada
    for i in range(n_cnn_layers):
        filters = best_params[f'cnn_filters_{i}']
        # Obtém os kernel_size e pool_size específicos desta camada do best_params
        kernel_size = best_params[f'cnn_kernel_size_{i}']
        pool_size = best_params[f'cnn_pooling_size_{i}']
        cnn_dropout_rate = best_params[f'cnn_dropout_{i}']

        # Recalcula ajustados para o modelo final, garantindo validade
        new_seq_length_after_conv = (current_seq_length - kernel_size) + 1
        adjusted_pool_size = min(pool_size, max(1, new_seq_length_after_conv))

        model.add(layers.Conv1D(
            filters=filters,
            kernel_size=kernel_size,
            activation='relu',
            padding='valid'
        ))
        model.add(layers.MaxPooling1D(pool_size=adjusted_pool_size))
        model.add(layers.Dropout(cnn_dropout_rate))

        current_seq_length = int(np.floor(new_seq_length_after_conv / adjusted_pool_size))

        # Avisa se a dimensão ficou inválida e para de adicionar CNNs se for o caso
        if current_seq_length <= 0 and i < n_cnn_layers - 1:
            print(f"Atenção: A camada Conv1D {i+1} resultou em uma dimensão de saída inválida ({current_seq_length}). As camadas CNN subsequentes foram ignoradas.")
            break


    n_lstm_layers = best_params['n_lstm_layers']
    lstm_units = [best_params[f'lstm_units_{i}'] for i in range(n_lstm_layers)]
    lstm_dropout = [best_params[f'lstm_dropout_{i}'] for i in range(n_lstm_layers)]
    
    n_dense_layers = best_params['n_dense_layers']
    dense_units = [best_params[f'dense_units_{i}'] for i in range(n_dense_layers)]
    dense_dropout = [best_params[f'dense_dropout_{i}'] for i in range(n_dense_layers)]
    # Camadas LSTM
    for i in range(n_lstm_layers):
        return_sequences = (i < n_lstm_layers - 1)
        model.add(layers.LSTM(
            lstm_units[i],
            return_sequences=return_sequences,
            kernel_regularizer=l2(best_params['l2_weight'])
        ))
        model.add(layers.Dropout(lstm_dropout[i]))

    # Camadas Densas
    for i in range(n_dense_layers):
        model.add(layers.Dense(dense_units[i], activation='relu'))
        model.add(layers.Dropout(dense_dropout[i]))

    # Saída
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Compilar
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model

#### Resultados

In [11]:
from sklearn.metrics import roc_curve, auc, classification_report

def evaluate_model(model, X_test, y_test, threshold=0.9897): #Limiar 0.9897
    y_prob = model.predict(X_test).ravel()

    # AUC e FPR
    fpr, tpr, thresholds = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)
    
    # Encontre o threshold ótimo (maximiza TPR - FPR)
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]
    
    y_test_1d = y_test.squeeze()
    y_pred = (y_prob > threshold).astype(int)
    
    # Métricas
    print(classification_report(y_test_1d, y_pred))
    print(pd.crosstab(y_test_1d, y_pred, rownames=['Real'], colnames=['Predito'], margins=True))
    print(f'AUC: {roc_auc:.4f}')
    print(f'Threshold ótimo: {optimal_threshold:.4f}')
    
    return roc_auc, threshold, fpr, tpr

In [None]:
results = {}

N_TRIALS = 200

for dataset_name, data in datasets.items(): #datasets_seq.items(): se for LSTM ou CNN + LSTM
    print(f'\n=== Processando {dataset_name} ===')

    study_name = f'tcc_auc_all_features_CV_{dataset_name.replace(" ", "_")}_v2'
    #study_name = f'tcc_auc_all_features_{dataset_name.replace(" ", "_")}_lstm_v2'
    #study_name = f'tcc_auc_all_features_{dataset_name.replace(" ", "_")}_cnn_lstm_v1'
    storage = 'sqlite:///../tcc.db'
    model_path = f'models/{study_name}.keras'

    if os.path.exists(model_path):
        print(f"Modelo {model_path} já existe. Carregando modelo...")
        best_model = tf.keras.models.load_model(model_path)

        print(f'\n=== Avaliação em {dataset_name} ===')
        roc_auc, optimal_threshold, fpr, tpr = evaluate_model(best_model, data['X_test'], data['y_test'])
        results[dataset_name] = {
            'model': best_model,
            'auc': roc_auc,
            'threshold': optimal_threshold,
            'fpr': fpr,
            'tpr': tpr
        }
        continue

    try: 
        study = optuna.load_study(study_name=study_name, storage=storage)
        print(f"Estudo {study_name} encontrado. Verificando trials...")
        
    except KeyError:
        print(f"Estudo {study_name} não existe. Criando novo estudo...")
        study = optuna.create_study(
            study_name=study_name,
            direction='maximize',
            load_if_exists=True,
            sampler=optuna.samplers.TPESampler(seed=42),
            storage=storage
        )



    if (N_TRIALS - len(study.trials)) > 0:
        print(f"Executando {N_TRIALS - len(study.trials)} trials restantes...")
        study.optimize(
            lambda trial: objective(trial, data['X_train'], data['y_train']),
            n_trials=(N_TRIALS - len(study.trials)),
            gc_after_trial=True,
        )
    else:
        print(f"Todos os trials já foram executados. Pulando otimização.")
    
    # Passo 2: Treinar o modelo final com os melhores parâmetros
    best_model = build_final_model_layer(study.best_params, data['X_train'].shape[1:]) #LSTM: build_final_model_lstm, #CNN + LSTM: build_final_model_cnn_lstm, #RNA: build_final_model_layer
    final_model_callback = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_auc',
            patience=50,
            mode='max',
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_auc',
            factor=0.5,
            patience=10,
            mode='max',
            min_lr=1e-7
        )
    ]
    
    best_model.fit(
        data['X_train'],
        data['y_train'],
        epochs=200,
        batch_size=study.best_params['batch_size'],
        validation_split=0.1,
        callbacks=final_model_callback,
        verbose=1
    )
    
    # Salvar o modelo
    print(f'\n=== Salvando o modelo em models ===')
    best_model.save(f'models/{study_name}.keras')
    # Passo 3: Avaliar no teste
    print(f'\n=== Avaliação em {dataset_name} ===')
    roc_auc, optimal_threshold, fpr, tpr = evaluate_model(best_model, data['X_test'], data['y_test'])
    results[dataset_name] = {
        'model': best_model,
        'auc': roc_auc,
        'threshold': optimal_threshold,
        'fpr': fpr,
        'tpr': tpr
    }


=== Processando Fundamental ===
Estudo tcc_auc_all_features_CV_Fundamental_v2 encontrado. Verificando trials...
Executando 172 trials restantes...
=== Iniciando Fold 1 de 5 ===


I0000 00:00:1748778236.528509 3723708 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060, pci bus id: 0000:07:00.0, compute capability: 8.9


Epoch 1/100


I0000 00:00:1748778240.332109 3738157 service.cc:152] XLA service 0x77eba80088b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748778240.332150 3738157 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4060, Compute Capability 8.9
2025-06-01 08:44:00.469718: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1748778241.075383 3738157 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  1/141[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m18:18[0m 8s/step - accuracy: 0.4688 - auc: 0.3815 - loss: 13.2070

I0000 00:00:1748778245.482486 3738157 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 57ms/step - accuracy: 0.5209 - auc: 0.5449 - loss: 12.8244 - val_accuracy: 0.3851 - val_auc: 0.6470 - val_loss: 12.5942 - learning_rate: 1.0034e-04
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5233 - auc: 0.5809 - loss: 12.5593 - val_accuracy: 0.4250 - val_auc: 0.6515 - val_loss: 12.4323 - learning_rate: 1.0034e-04
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5481 - auc: 0.6076 - loss: 12.3577 - val_accuracy: 0.5324 - val_auc: 0.6403 - val_loss: 12.1541 - learning_rate: 1.0034e-04
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5331 - auc: 0.6004 - loss: 12.1829 - val_accuracy: 0.5013 - val_auc: 0.6226 - val_loss: 11.9767 - learning_rate: 1.0034e-04
Epoch 5/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5296 - au

[I 2025-06-01 08:56:27,746] Trial 28 finished with value: 0.7930768609046936 and parameters: {'n_layers': 6, 'units_0': 96, 'dropout_0': 0.20336982935001102, 'units_1': 416, 'dropout_1': 0.30680058026028717, 'units_2': 256, 'dropout_2': 0.36641030381430023, 'units_3': 416, 'dropout_3': 0.41868216227706284, 'units_4': 512, 'dropout_4': 0.07851476808521922, 'units_5': 160, 'dropout_5': 0.24136545880183238, 'learning_rate': 0.00010033550053668757, 'l2_weight': 0.003960344322253316, 'l1_weight': 0.00019965897921805212, 'optimizer': 'SGD'}. Best is trial 22 with value: 0.7947261095046997.


=== Fim dos Folds ===
AUC Médio dos Folds: 0.7931
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 115ms/step - accuracy: 0.4890 - auc: 0.4940 - loss: 25.8045 - val_accuracy: 0.8004 - val_auc: 0.6610 - val_loss: 24.0330 - learning_rate: 0.0016
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5426 - auc: 0.5747 - loss: 23.4014 - val_accuracy: 0.3842 - val_auc: 0.6831 - val_loss: 20.8152 - learning_rate: 0.0016
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5455 - auc: 0.6327 - loss: 19.9005 - val_accuracy: 0.5750 - val_auc: 0.6822 - val_loss: 17.1734 - learning_rate: 0.0016
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.5103 - auc: 0.6487 - loss: 16.2708 - val_accuracy: 0.5608 - val_auc: 0.6922 - val_loss: 13.7946 - learning_rate: 0.0016
Epoch 5/100
[1m141/141[0m [

[I 2025-06-01 09:07:19,982] Trial 29 finished with value: 0.690646481513977 and parameters: {'n_layers': 14, 'units_0': 64, 'dropout_0': 0.13814241407686934, 'units_1': 416, 'dropout_1': 0.4204864850871825, 'units_2': 224, 'dropout_2': 0.3528177679322396, 'units_3': 480, 'dropout_3': 0.4973885530197304, 'units_4': 480, 'dropout_4': 0.06567286041381598, 'units_5': 160, 'dropout_5': 0.24316982967174544, 'units_6': 448, 'dropout_6': 0.4232518692528044, 'units_7': 128, 'dropout_7': 0.31718974715503895, 'units_8': 512, 'dropout_8': 0.34789750070720626, 'units_9': 352, 'dropout_9': 0.1317820575547327, 'units_10': 320, 'dropout_10': 0.489124098804985, 'units_11': 96, 'dropout_11': 0.0747692458234066, 'units_12': 352, 'dropout_12': 0.20779358756472804, 'units_13': 128, 'dropout_13': 0.0031950190265366274, 'learning_rate': 0.0016429833567782582, 'l2_weight': 0.004165611187673042, 'l1_weight': 0.00020012368837678805, 'optimizer': 'SGD'}. Best is trial 22 with value: 0.7947261095046997.


=== Fim dos Folds ===
AUC Médio dos Folds: 0.6906
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 165ms/step - accuracy: 0.4877 - auc: 0.4932 - loss: 57.5047 - val_accuracy: 0.1996 - val_auc: 0.5257 - val_loss: 57.1863 - learning_rate: 1.8124e-05
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.4929 - auc: 0.5117 - loss: 57.4066 - val_accuracy: 0.1996 - val_auc: 0.5454 - val_loss: 57.3202 - learning_rate: 1.8124e-05
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.4908 - auc: 0.4920 - loss: 57.3105 - val_accuracy: 0.2023 - val_auc: 0.5219 - val_loss: 57.3045 - learning_rate: 1.8124e-05
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.4921 - auc: 0.4896 - loss: 57.2598 - val_accuracy: 0.2050 - val_auc: 0.5334 - val_loss: 57.2580 - learning_rate: 1.8124e-05
Epoch 5/100
[

[I 2025-06-01 09:20:24,882] Trial 30 finished with value: 0.5534576892852783 and parameters: {'n_layers': 19, 'units_0': 96, 'dropout_0': 0.23596196952404397, 'units_1': 512, 'dropout_1': 0.2614998135372236, 'units_2': 448, 'dropout_2': 0.4950646873092215, 'units_3': 320, 'dropout_3': 0.4026327425646885, 'units_4': 512, 'dropout_4': 0.08235616107642453, 'units_5': 256, 'dropout_5': 0.15832704008414122, 'units_6': 256, 'dropout_6': 0.29861274558849477, 'units_7': 256, 'dropout_7': 0.41684350719578744, 'units_8': 160, 'dropout_8': 0.4133788367801512, 'units_9': 96, 'dropout_9': 0.22872397153558469, 'units_10': 192, 'dropout_10': 0.09313345849001314, 'units_11': 224, 'dropout_11': 0.30626447898206405, 'units_12': 128, 'dropout_12': 0.40654544752684313, 'units_13': 512, 'dropout_13': 0.3946356671888836, 'units_14': 320, 'dropout_14': 0.13849914871346353, 'units_15': 512, 'dropout_15': 0.16768436457966443, 'units_16': 384, 'dropout_16': 0.0036683946785462385, 'units_17': 32, 'dropout_17': 0

=== Fim dos Folds ===
AUC Médio dos Folds: 0.5535
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 68ms/step - accuracy: 0.5143 - auc: 0.5428 - loss: 15.9163 - val_accuracy: 0.3469 - val_auc: 0.6246 - val_loss: 15.2161 - learning_rate: 2.5022e-04
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5374 - auc: 0.5901 - loss: 15.0339 - val_accuracy: 0.3469 - val_auc: 0.6404 - val_loss: 14.5504 - learning_rate: 2.5022e-04
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5432 - auc: 0.5980 - loss: 14.2399 - val_accuracy: 0.3807 - val_auc: 0.6175 - val_loss: 13.7508 - learning_rate: 2.5022e-04
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5379 - auc: 0.6136 - loss: 13.4458 - val_accuracy: 0.3931 - val_auc: 0.6285 - val_loss: 13.0046 - learning_rate: 2.5022e-04
Epoch 5/100
[1m14

[I 2025-06-01 09:33:32,780] Trial 31 finished with value: 0.7792397022247315 and parameters: {'n_layers': 6, 'units_0': 32, 'dropout_0': 0.2914642094840141, 'units_1': 448, 'dropout_1': 0.18438293459874647, 'units_2': 192, 'dropout_2': 0.32066392074814765, 'units_3': 416, 'dropout_3': 0.432813193582386, 'units_4': 448, 'dropout_4': 0.13855260257046947, 'units_5': 128, 'dropout_5': 0.043907426224221946, 'learning_rate': 0.00025021534341454347, 'l2_weight': 0.002996949312545941, 'l1_weight': 0.000558643462808232, 'optimizer': 'SGD'}. Best is trial 22 with value: 0.7947261095046997.


=== Fim dos Folds ===
AUC Médio dos Folds: 0.7792
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 302ms/step - accuracy: 0.4985 - auc: 0.5036 - loss: 169.5974 - val_accuracy: 0.1996 - val_auc: 0.5061 - val_loss: 169.4702 - learning_rate: 1.6967e-06
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.5085 - auc: 0.5156 - loss: 169.5860 - val_accuracy: 0.1996 - val_auc: 0.4856 - val_loss: 169.6536 - learning_rate: 1.6967e-06
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.5019 - auc: 0.4969 - loss: 169.6084 - val_accuracy: 0.1996 - val_auc: 0.4742 - val_loss: 169.7327 - learning_rate: 1.6967e-06
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.5013 - auc: 0.4984 - loss: 169.5714 - val_accuracy: 0.2032 - val_auc: 0.4609 - val_loss: 169.7225 - learning_rate: 1.6967e-06
Epoch 

[I 2025-06-01 09:51:45,552] Trial 32 finished with value: 0.5291840314865113 and parameters: {'n_layers': 33, 'units_0': 224, 'dropout_0': 0.0956969509451071, 'units_1': 384, 'dropout_1': 0.4355227634935818, 'units_2': 288, 'dropout_2': 0.25969781495136846, 'units_3': 512, 'dropout_3': 0.2896507688737935, 'units_4': 512, 'dropout_4': 0.04045362150642706, 'units_5': 320, 'dropout_5': 0.4154576030180011, 'units_6': 32, 'dropout_6': 0.24559182508805918, 'units_7': 448, 'dropout_7': 0.1932589642052544, 'units_8': 224, 'dropout_8': 0.2812859991614616, 'units_9': 416, 'dropout_9': 0.4304066723719996, 'units_10': 352, 'dropout_10': 0.31602488322723654, 'units_11': 512, 'dropout_11': 0.41247851036119915, 'units_12': 320, 'dropout_12': 0.2939189150047372, 'units_13': 384, 'dropout_13': 0.19118335769958694, 'units_14': 512, 'dropout_14': 0.37370819126527355, 'units_15': 192, 'dropout_15': 0.15910464056730048, 'units_16': 384, 'dropout_16': 0.18318231401026971, 'units_17': 320, 'dropout_17': 0.45

=== Fim dos Folds ===
AUC Médio dos Folds: 0.5292
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 202ms/step - accuracy: 0.5131 - auc: 0.5120 - loss: 275.0545 - val_accuracy: 0.8004 - val_auc: 0.5000 - val_loss: 107.1860 - learning_rate: 0.0849
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.4706 - auc: 0.4745 - loss: 81.1502 - val_accuracy: 0.1996 - val_auc: 0.6107 - val_loss: 47.9522 - learning_rate: 0.0849
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.4958 - auc: 0.5078 - loss: 57.6351 - val_accuracy: 0.8004 - val_auc: 0.3390 - val_loss: 43.2565 - learning_rate: 0.0849
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.5166 - auc: 0.5261 - loss: 92.4327 - val_accuracy: 0.8004 - val_auc: 0.4714 - val_loss: 161.2126 - learning_rate: 0.0849
Epoch 5/100
[1m141/141[0m

[I 2025-06-01 10:06:49,186] Trial 33 finished with value: 0.667721962928772 and parameters: {'n_layers': 21, 'units_0': 128, 'dropout_0': 0.16090933391339268, 'units_1': 320, 'dropout_1': 0.37444380949013545, 'units_2': 416, 'dropout_2': 0.45153598059022215, 'units_3': 320, 'dropout_3': 0.407995353203904, 'units_4': 224, 'dropout_4': 0.11641464082434953, 'units_5': 192, 'dropout_5': 0.14711763342488662, 'units_6': 352, 'dropout_6': 0.49821225468762936, 'units_7': 96, 'dropout_7': 0.2834744858655118, 'units_8': 96, 'dropout_8': 0.09794214604775642, 'units_9': 288, 'dropout_9': 0.3170499968990059, 'units_10': 32, 'dropout_10': 0.4065101295903968, 'units_11': 384, 'dropout_11': 0.19864798338374778, 'units_12': 192, 'dropout_12': 0.020959449828570953, 'units_13': 128, 'dropout_13': 0.4040323582417779, 'units_14': 96, 'dropout_14': 0.22601078845717046, 'units_15': 32, 'dropout_15': 0.37208622360950405, 'units_16': 128, 'dropout_16': 0.37397701339309386, 'units_17': 192, 'dropout_17': 0.1234

=== Fim dos Folds ===
AUC Médio dos Folds: 0.6677
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 112ms/step - accuracy: 0.5080 - auc: 0.5101 - loss: 4.2729 - val_accuracy: 0.2138 - val_auc: 0.5670 - val_loss: 4.2313 - learning_rate: 8.3459e-05
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.5176 - auc: 0.5307 - loss: 4.2217 - val_accuracy: 0.2520 - val_auc: 0.5709 - val_loss: 4.3032 - learning_rate: 8.3459e-05
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5028 - auc: 0.5163 - loss: 4.2031 - val_accuracy: 0.2555 - val_auc: 0.5457 - val_loss: 4.3384 - learning_rate: 8.3459e-05
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5136 - auc: 0.5304 - loss: 4.1471 - val_accuracy: 0.2431 - val_auc: 0.5913 - val_loss: 4.3986 - learning_rate: 8.3459e-05
Epoch 5/100
[1m141/14

[I 2025-06-01 10:24:33,134] Trial 34 finished with value: 0.7159428358078003 and parameters: {'n_layers': 10, 'units_0': 96, 'dropout_0': 0.3026171556080687, 'units_1': 448, 'dropout_1': 0.28500729178965534, 'units_2': 256, 'dropout_2': 0.37902696406443775, 'units_3': 480, 'dropout_3': 0.25872272578437105, 'units_4': 384, 'dropout_4': 0.25753683064288535, 'units_5': 96, 'dropout_5': 0.23108162959866357, 'units_6': 192, 'dropout_6': 0.3403783534821918, 'units_7': 256, 'dropout_7': 0.0782452072196454, 'units_8': 416, 'dropout_8': 0.13452927328900902, 'units_9': 96, 'dropout_9': 0.02371118392212851, 'learning_rate': 8.345868675634183e-05, 'l2_weight': 0.00047296458534118723, 'l1_weight': 6.223555648325313e-05, 'optimizer': 'RMSprop'}. Best is trial 22 with value: 0.7947261095046997.


=== Fim dos Folds ===
AUC Médio dos Folds: 0.7159
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 62ms/step - accuracy: 0.5046 - auc: 0.5314 - loss: 3.8189 - val_accuracy: 0.5324 - val_auc: 0.6744 - val_loss: 3.6003 - learning_rate: 1.0184e-04
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5287 - auc: 0.5909 - loss: 3.7031 - val_accuracy: 0.4836 - val_auc: 0.6759 - val_loss: 3.6682 - learning_rate: 1.0184e-04
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5668 - auc: 0.6093 - loss: 3.6624 - val_accuracy: 0.4774 - val_auc: 0.6772 - val_loss: 3.6717 - learning_rate: 1.0184e-04
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5491 - auc: 0.6193 - loss: 3.6198 - val_accuracy: 0.4632 - val_auc: 0.6793 - val_loss: 3.6794 - learning_rate: 1.0184e-04
Epoch 5/100
[1m141/141

[I 2025-06-01 10:37:28,959] Trial 35 finished with value: 0.8020259499549866 and parameters: {'n_layers': 5, 'units_0': 224, 'dropout_0': 0.19946226689659807, 'units_1': 224, 'dropout_1': 0.34260968980923695, 'units_2': 320, 'dropout_2': 0.4152262127386562, 'units_3': 384, 'dropout_3': 0.3393003997436192, 'units_4': 448, 'dropout_4': 0.1902787126191235, 'learning_rate': 0.000101836239469746, 'l2_weight': 1.4103699640208214e-05, 'l1_weight': 0.0001169683914750294, 'optimizer': 'SGD'}. Best is trial 35 with value: 0.8020259499549866.


=== Fim dos Folds ===
AUC Médio dos Folds: 0.8020
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 50ms/step - accuracy: 0.5118 - auc: 0.5547 - loss: 3.6664 - val_accuracy: 0.7959 - val_auc: 0.6803 - val_loss: 3.1659 - learning_rate: 2.4736e-04
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5606 - auc: 0.6164 - loss: 3.5085 - val_accuracy: 0.7879 - val_auc: 0.6729 - val_loss: 3.1395 - learning_rate: 2.4736e-04
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5502 - auc: 0.6055 - loss: 3.4765 - val_accuracy: 0.7737 - val_auc: 0.6470 - val_loss: 3.1324 - learning_rate: 2.4736e-04
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5595 - auc: 0.6186 - loss: 3.3735 - val_accuracy: 0.7569 - val_auc: 0.6525 - val_loss: 3.0969 - learning_rate: 2.4736e-04
Epoch 5/100
[1m141/141[0

[I 2025-06-01 10:49:01,934] Trial 36 finished with value: 0.8087077021598816 and parameters: {'n_layers': 4, 'units_0': 224, 'dropout_0': 0.21838762220014432, 'units_1': 96, 'dropout_1': 0.33178932434061914, 'units_2': 320, 'dropout_2': 0.4603731649176487, 'units_3': 416, 'dropout_3': 0.4650889659758929, 'learning_rate': 0.00024735915583177614, 'l2_weight': 1.1173804592233608e-05, 'l1_weight': 0.00018873591999723526, 'optimizer': 'SGD'}. Best is trial 36 with value: 0.8087077021598816.


=== Fim dos Folds ===
AUC Médio dos Folds: 0.8087
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 48ms/step - accuracy: 0.5462 - auc: 0.5826 - loss: 4.1822 - val_accuracy: 0.7551 - val_auc: 0.6881 - val_loss: 3.5710 - learning_rate: 0.0011
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5602 - auc: 0.6318 - loss: 3.7859 - val_accuracy: 0.6096 - val_auc: 0.6917 - val_loss: 3.4590 - learning_rate: 0.0011
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5544 - auc: 0.6462 - loss: 3.5391 - val_accuracy: 0.5652 - val_auc: 0.6568 - val_loss: 3.2773 - learning_rate: 0.0011
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5871 - auc: 0.6800 - loss: 3.2891 - val_accuracy: 0.4614 - val_auc: 0.7009 - val_loss: 3.2201 - learning_rate: 0.0011
Epoch 5/100
[1m141/141[0m [32m━━━━━━━━━

[I 2025-06-01 11:01:04,892] Trial 37 finished with value: 0.7868496298789978 and parameters: {'n_layers': 4, 'units_0': 224, 'dropout_0': 0.22169507456551027, 'units_1': 128, 'dropout_1': 0.33312413688354525, 'units_2': 320, 'dropout_2': 0.4501059238331854, 'units_3': 416, 'dropout_3': 0.46289851744194377, 'learning_rate': 0.001063617356228044, 'l2_weight': 1.007895584627717e-05, 'l1_weight': 0.00021251861275921346, 'optimizer': 'SGD'}. Best is trial 36 with value: 0.8087077021598816.


=== Fim dos Folds ===
AUC Médio dos Folds: 0.7868
=== Iniciando Fold 1 de 5 ===
Epoch 1/100
[1m140/141[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - accuracy: 0.5005 - auc: 0.5097 - loss: 6.9345

## Treinando o modelo com os melhores parâmetros do Optuna -- OLD VERSION

In [None]:
storage_study = 'sqlite:///tcc.db'
study = optuna.load_study(
    study_name='tcc_v5',
    storage=storage_study
)
best_params = study.best_params
print('Melhores hiperparâmetros:', best_params)

### F0

In [None]:
def test_model(X_train, y_train, X_test, y_test, dataset):
    study = optuna.load_study(
        study_name=f'tcc_{dataset}_v5',
        storage=storage_study
    )
    best_model = create_model(study.best_trial, X_train.shape[1:])
    best_model.fit(
        X_train,
        y_train,
        epochs=150,
        batch_size=best_params['batch_size'],
        verbose=1
    )

    y_test_pred = (best_model.predict(X_test)).ravel()
    test_auc = roc_auc_score(y_test, y_test_pred)

    
    print(f'AUC no teste: {test_auc:.4f}')

In [None]:
from sklearn.metrics import classification_report
model = tf.keras.models.Sequential()
model.add(layers.InputLayer(input_shape=X_train_f0.shape[1:]))

for _ in range(best_params['n_layers']):
    model.add(layers.Dense(best_params['units'], activation='relu'))
    model.add(layers.Dropout(best_params['dropout']))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

callback = tf.keras.callbacks.EarlyStopping(
    monitor='accuracy',
    patience=75,
    verbose=1,
)

hist = model.fit(
    X_train_f0,
    y_train_f0,
    epochs=150,
    batch_size=best_params['batch_size'],
    verbose=1,
    callbacks=[callback]
)


In [None]:
y_prob_f0 = model.predict(X_test_f0)
y_pred_labels_f0 = (y_prob_f0 > 0.5).astype(int)
acc = accuracy_score(y_test_f0, y_pred_labels_f0)
print('Acurácia no conjunto de teste:', acc)
print(classification_report(y_test_f0, y_pred_labels_f0))

In [None]:
y_test_1d_f0 = y_test_f0.squeeze()
y_pred_labels_1d_f0 = y_pred_labels_f0.squeeze()
y_prob_1d_f0 = y_prob_f0.squeeze()

In [None]:
print(pd.crosstab(y_test_1d_f0, y_pred_labels_1d_f0, rownames=['Real'], colnames=['Predito'], margins=True),'')


In [None]:
limiar = 0.9897
y_pred_limiar_f0 = (y_prob_f0 > limiar)
y_pred_limiar_f0 = np.multiply(y_pred_limiar_f0, 1)
y_pred_limiar_f0 

In [None]:
y_pred_limiar_1d_f0 = y_pred_limiar_f0.squeeze()

In [None]:
from sklearn.metrics import confusion_matrix

tn_f0, fp_f0, fn_f0, tp_f0 = confusion_matrix(y_test_1d_f0, y_pred_limiar_1d_f0).ravel()

taxa_fp_f0 = fp_f0 / (fp_f0 + tn_f0)
taxa_detec_f0 = tp_f0 / (tp_f0 + fn_f0)

print('Taxa de Falsos Positivos:', taxa_fp_f0)
print('Taxa de Detecção:', taxa_detec_f0)

In [None]:
from sklearn.metrics import roc_curve, auc

fpr_f0, tpr_f0, thresholds_f0 = roc_curve(y_test_1d_f0, y_prob_f0)
roc_auc_f0 = auc(fpr_f0, tpr_f0)
print('AUC:', roc_auc_f0)

In [None]:
import matplotlib.pyplot as plt

def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)
    plt.grid(True)

plt.figure(figsize=(8, 6))
plot_roc_curve(fpr_f0, tpr_f0)
plt.show()


### F0 e 2F0

In [None]:
from sklearn.metrics import classification_report
model = tf.keras.models.Sequential()
model.add(layers.InputLayer(input_shape=X_train_h2.shape[1:]))

for _ in range(best_params['n_layers']):
    model.add(layers.Dense(best_params['units'], activation='relu'))
    model.add(layers.Dropout(best_params['dropout']))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

callback = tf.keras.callbacks.EarlyStopping(
    monitor='accuracy',
    patience=75,
    verbose=1,
)

hist = model.fit(
    X_train_h2,
    y_train_h2,
    epochs=150,
    batch_size=best_params['batch_size'],
    verbose=1,
    callbacks=[callback]
)




In [None]:
y_prob_h2 = model.predict(X_test_h2)
y_pred_labels_h2 = (y_prob_h2 > 0.5).astype(int)
acc = accuracy_score(y_test_h2, y_pred_labels_h2)
print('Acurácia no conjunto de teste:', acc)
print(classification_report(y_test_h2, y_pred_labels_h2))

In [None]:
y_test_1d_h2 = y_test_h2.squeeze()
y_pred_labels_1d_h2 = y_pred_labels_h2.squeeze()
y_prob_1d_h2 = y_prob_h2.squeeze()

In [None]:
print(pd.crosstab(y_test_1d_h2, y_pred_labels_1d_h2, rownames=['Real'], colnames=['Predito'], margins=True),'')


In [None]:
limiar = 0.9897
y_pred_limiar_h2 = (y_prob_h2 > limiar)
y_pred_limiar_h2 = np.multiply(y_pred_limiar_h2, 1)
y_pred_limiar_h2 

In [None]:
y_pred_limiar_1d_h2 = y_pred_limiar_h2.squeeze()

In [None]:
from sklearn.metrics import confusion_matrix

tn_h2, fp_h2, fn_h2, tp_h2 = confusion_matrix(y_test_1d_h2, y_pred_limiar_1d_h2).ravel()

taxa_fp_h2 = fp_h2 / (fp_h2 + tn_h2)
taxa_detec_h2 = tp_h2 / (tp_h2 + fn_h2)

print('Taxa de Falsos Positivos:', taxa_fp_h2)
print('Taxa de Detecção:', taxa_detec_h2)

In [None]:
from sklearn.metrics import roc_curve, auc

fpr_h2, tpr_h2, thresholds_h2 = roc_curve(y_test_1d_h2, y_prob_h2)
roc_auc_h2 = auc(fpr_h2, tpr_h2)
print('AUC:', roc_auc_h2)

In [None]:
import matplotlib.pyplot as plt

def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)
    plt.grid(True)

plt.figure(figsize=(8, 6))
plot_roc_curve(fpr_h2, tpr_h2)
plt.show()


### F0, 2F0 e 3F0

In [None]:
from sklearn.metrics import classification_report
model = tf.keras.models.Sequential()
model.add(layers.InputLayer(input_shape=X_train.shape[1:]))

for _ in range(best_params['n_layers']):
    model.add(layers.Dense(best_params['units'], activation='relu'))
    model.add(layers.Dropout(best_params['dropout']))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

callback = tf.keras.callbacks.EarlyStopping(
    monitor='accuracy',
    patience=75,
    verbose=1,
)

hist = model.fit(
    X_train,
    y_train,
    epochs=150,
    batch_size=best_params['batch_size'],
    verbose=1,
    callbacks=[callback]
)


In [None]:
y_prob = model.predict(X_test)
y_pred_labels = (y_prob > 0.5).astype(int)
acc = accuracy_score(y_test, y_pred_labels)
print('Acurácia no conjunto de teste:', acc)
print(classification_report(y_test, y_pred_labels))

In [None]:
y_test_1d = y_test.squeeze()
y_pred_labels_1d = y_pred_labels.squeeze()
y_prob_1d = y_prob.squeeze()

In [None]:
print(pd.crosstab(y_test_1d, y_pred_labels_1d, rownames=['Real'], colnames=['Predito'], margins=True),'')


In [None]:
limiar = 0.9897
y_pred_limiar = (y_prob > limiar)
y_pred_limiar = np.multiply(y_pred_limiar, 1)
y_pred_limiar 

In [None]:
y_pred_limiar_1d = y_pred_limiar.squeeze()

In [None]:
from sklearn.metrics import confusion_matrix

tn, fp, fn, tp = confusion_matrix(y_test_1d, y_pred_limiar_1d).ravel()

taxa_fp = fp / (fp + tn)
taxa_detec = tp / (tp + fn)

print('Taxa de Falsos Positivos:', taxa_fp)
print('Taxa de Detecção:', taxa_detec)

In [None]:
from sklearn.metrics import roc_curve, auc

fpr, tpr, thresholds = roc_curve(y_test_1d, y_prob)
roc_auc = auc(fpr, tpr)
print('AUC:', roc_auc)

In [None]:
import matplotlib.pyplot as plt

def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)
    plt.grid(True)

plt.figure(figsize=(8, 6))
plot_roc_curve(fpr, tpr)
plt.show()


# Métodos Tradicionais

In [None]:
X_test_rec = pd.read_csv('f0/X_test_rec_f0.csv')
y_test_f0 = pd.read_parquet('f0/y_test_f0.parquet')

X_test_rec_nh = X_test_rec.iloc[:, :500].values.astype(np.complex128)

In [None]:
X_test_rec_nh

In [None]:
from sklearn.metrics import roc_curve

M = 500;
alfa=0.05;

teta = np.angle(X_test_rec_nh)
y_test_only_1=np.flatnonzero(y_test)
y_test_only_0=np.flatnonzero(y_test==0)
## CSM
CSM=(np.power((1/M)*np.sum(np.cos(teta),axis=1),2))+(np.power((1/M)*np.sum(np.sin(teta),axis=1),2))

from scipy.stats.distributions import chi2

VC_CSM=chi2.ppf(1-alfa, df=2)/(2*M)
VC_CSM
y_CSM_txd=(CSM[y_test_only_1] > VC_CSM)
#y_pred_limiar=int(y_pred_limiar)

# Converting boolean to integer
y_CSM_txd = np.multiply(y_CSM_txd, 1)

y_CSM_txd=np.mean(y_CSM_txd)


y_CSM_fp=(CSM[y_test_only_0] > VC_CSM)
#y_pred_limiar=int(y_pred_limiar)

# Converting boolean to integer
y_CSM_fp = np.multiply(y_CSM_fp, 1)

y_CSM_fp=np.mean(y_CSM_fp)

y_CSM=(CSM > VC_CSM)
#y_pred_limiar=int(y_pred_limiar)

# Converting boolean to integer
y_CSM = np.multiply(y_CSM, 1)

fpr_CSM, tpr_CSM, thresholds_CSM = roc_curve(y_test,CSM.transpose())
print(y_CSM_txd)
print(y_CSM_fp)
## MSC
MSC=(np.abs(np.sum(X_test_rec_nh,axis=1))**2)/(M*np.sum((np.abs(X_test_rec_nh)**2),axis=1))
MSC = np.reshape(MSC,(1,len(MSC)))

VC_MSC=1-(alfa**(1/(M-1)))
VC_MSC
y_MSC_txd=(MSC[0,y_test_only_1] > VC_MSC)
#y_pred_limiar=int(y_pred_limiar)

# Converting boolean to integer
y_MSC_txd = np.multiply(y_MSC_txd, 1)

y_MSC_txd=np.mean(y_MSC_txd)

from numpy import matlib

y_MSC_fp=(MSC[0, y_test_only_0] > np.matlib.repmat(VC_MSC,1096,1))
#y_pred_limiar=int(y_pred_limiar)

# Converting boolean to integer
y_MSC_fp = np.multiply(y_MSC_fp, 1)

y_MSC_fp=np.mean(y_MSC_fp)

fpr_MSC, tpr_MSC, thresholds_MSC = roc_curve(y_test,MSC.transpose())


In [None]:
# Supondo que você já tenha as curvas externas:
results['CSM'] = {
    'fpr': fpr_CSM, 
    'tpr': tpr_CSM, 
    'auc': auc(fpr_CSM, tpr_CSM),
    'label': 'CSM'
}

results['MSC'] = {
    'fpr': fpr_MSC, 
    'tpr': tpr_MSC, 
    'auc': auc(fpr_MSC, tpr_MSC),
    'label': 'MSC'
}

# Curva ROC - Comparação

In [None]:
def plot_all_roc_curves(results):
    plt.figure(figsize=(10, 8))
    
    # Plotar cada curva do dicionário
    for key in results:
        data = results[key]
        plt.plot(
            data['fpr'], 
            data['tpr'], 
            linewidth=2,
            label=f"{data.get('label', key)} (AUC = {data['auc']:.2f})"
        )
    
    # Linha de referência
    plt.plot([0, 1], [0, 1], 'k--', linewidth=2)
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.title('Comparação de Curvas ROC', fontsize=14)
    plt.legend(loc='lower right', fontsize=10)
    plt.grid(True)
    plt.show()

In [None]:
# Após o loop de avaliação dos datasets, adicione:
plot_all_roc_curves(results)