In [None]:
!pip install np_utils

In [None]:
! python np_utils -v

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
import os
from os import listdir
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import plotly.express as px
import seaborn as sns

import cv2
from matplotlib.image import imread

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix


import glob
import PIL
from PIL import Image
import random

random.seed(100)

In [None]:
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    print("We got a GPU")
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
    print("Sorry, no GPU for you...")

In [None]:
import matplotlib.pyplot as plt

dataset_name = 'cbis-ddsm-breast-cancer-image-dataset' # Example dataset name
subfolder_name = 'jpeg/1.3.6.1.4.1.9590.100.1.2.316322277110955049538295115662287535438'
image_filename = '1-098.jpg' # Example image filename

dataset_path = f'/kaggle/input/{dataset_name}/{subfolder_name}/'
image_path = os.path.join(dataset_path, image_filename)

try:
    img = Image.open(image_path)
    plt.imshow(img)
    plt.title(f'Displaying: {image_filename}')
    plt.axis('off')
    plt.show()
    print(f"Successfully opened and displayed {image_filename}")
except FileNotFoundError:
    print(f"Error: Image not found at {image_path}. Please check the path.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
base_dir = "/kaggle/input/cbis-ddsm-breast-cancer-image-dataset"
image_dir = os.path.join(base_dir, "jpeg")

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
dicom_data = pd.read_csv(f"{base_dir}/csv/dicom_info.csv")
calc_df = pd.read_csv(f"{base_dir}/csv/calc_case_description_train_set.csv")
mass_df = pd.read_csv(f"{base_dir}/csv/mass_case_description_train_set.csv")
calc_df_test = pd.read_csv(f"{base_dir}/csv/calc_case_description_test_set.csv")
mass_df_test = pd.read_csv(f"{base_dir}/csv/mass_case_description_test_set.csv")

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
dicom_clean = dicom_data.copy()

cols_to_drop = [
    'PatientBirthDate','AccessionNumber','Columns','ContentDate','ContentTime',
    'PatientSex','ReferringPhysicianName','Rows','SOPClassUID','SOPInstanceUID',
    'StudyDate','StudyID','StudyTime','InstanceNumber',
    'SeriesNumber'
]

dicom_clean.drop(cols_to_drop, axis=1, inplace=True, errors='ignore')

dicom_clean['SeriesDescription'] = dicom_clean['SeriesDescription'].fillna(method='bfill')
dicom_clean['Laterality'] = dicom_clean['Laterality'].fillna(method='bfill')

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
dicom_clean.head()

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
calc = calc_df.copy()
calc = calc.rename(columns={
    'calc type':'calc_type',
    'calc distribution':'calc_distribution',
    'image view':'image_view',
    'left or right breast':'left_or_right_breast',
    'breast density':'breast_density',
    'abnormality type':'abnormality_type',
    'abnormality id': 'abnormality_id',
    'image file path': 'image_file_path',
    'cropped image file path': 'cropped_image_file_path',
    'ROI mask file path': 'ROI_mask_file_path'
})

# Converter para categorias
for col in ['pathology','calc_type','calc_distribution','abnormality_type',
            'image_view','left_or_right_breast']:
    if col in calc.columns:
        calc[col] = calc[col].astype('category')

calc['calc_type'].fillna(method='bfill', inplace=True)
calc['calc_distribution'].fillna(method='bfill', inplace=True)

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
calc.head(5)

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
mass = mass_df.copy()
mass = mass.rename(columns={
    'mass shape':'mass_shape',
    'mass margins':'mass_margins',
    'image view':'image_view',
    'left or right breast':'left_or_right_breast',
    'abnormality type':'abnormality_type',
    'abnormality id': 'abnormality_id',
    'image file path': 'image_file_path',
    'cropped image file path': 'cropped_image_file_path',
    'ROI mask file path': 'ROI_mask_file_path'
})

for col in ['mass_shape','mass_margins','pathology','abnormality_type',
            'image_view','left_or_right_breast']:
    if col in mass.columns:
        mass[col] = mass[col].astype('category')

mass['mass_shape'].fillna(method='bfill', inplace=True)
mass['mass_margins'].fillna(method='bfill', inplace=True)


In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
mass.head(5)

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
calc_test = calc_df_test.copy()
calc_test = calc_test.rename(columns={
    'calc type':'calc_type',
    'calc distribution':'calc_distribution',
    'image view':'image_view',
    'left or right breast':'left_or_right_breast',
    'breast density':'breast_density',
    'abnormality type':'abnormality_type',
    'abnormality id': 'abnormality_id',
    'image file path': 'image_file_path',
    'cropped image file path': 'cropped_image_file_path',
    'ROI mask file path': 'ROI_mask_file_path'
})

# Converter para categorias
for col in ['pathology','calc_type','calc_distribution','abnormality_type',
            'image_view','left_or_right_breast']:
    if col in calc_test.columns:
        calc_test[col] = calc_test[col].astype('category')

calc_test['calc_type'].fillna(method='bfill', inplace=True)
calc_test['calc_distribution'].fillna(method='bfill', inplace=True)

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
mass_test = mass_df_test.copy()
mass_test = mass_test.rename(columns={
    'mass shape':'mass_shape',
    'mass margins':'mass_margins',
    'image view':'image_view',
    'left or right breast':'left_or_right_breast',
    'abnormality type':'abnormality_type',
    'abnormality id': 'abnormality_id',
    'image file path': 'image_file_path',
    'cropped image file path': 'cropped_image_file_path',
    'ROI mask file path': 'ROI_mask_file_path'
})

for col in ['mass_shape','mass_margins','pathology','abnormality_type',
            'image_view','left_or_right_breast']:
    if col in mass_test.columns:
        mass_test[col] = mass_test[col].astype('category')

mass_test['mass_shape'].fillna(method='bfill', inplace=True)
mass_test['mass_margins'].fillna(method='bfill', inplace=True)


In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
full_df = pd.concat([calc, calc_test, mass, mass_test], ignore_index=True)

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
full_df.head()

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
#checking common patients and pathologies between both dataframes 
common_patient_ids = calc[calc['patient_id'].isin(mass['patient_id']) &
                          calc['patient_id'].isin(calc_test['patient_id']) &
                          calc['patient_id'].isin(mass_test['patient_id'])]['patient_id']
# common_patient_ids = calc[calc['patient_id'].isin(mass['patient_id'])]['patient_id']
print(common_patient_ids)

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
#this patient have both abnormality types
full_df[full_df['patient_id'] == 'P_00034']['cropped_image_file_path'][49]

In [None]:
# Nomes: Carla Lapa, Evelyn Bessa, Nezi Pimentel, Sandra Valcacer, Vitoria Almeida, Samira Souza
def map_pathology(p):
    if p == "MALIGNANT":
        return 1
    else:
        return 0

full_df['label'] = full_df['pathology'].apply(map_pathology)

In [None]:
dicom_clean.head()

In [None]:
import os
import pandas as pd

# ================================================================
# 1. Função para extrair identifiers corretos dos paths
# ================================================================
def extract_uids(path):
    """
    Extrai:
    - dicom_patient_id  → ex: Calc-Training_P_00005_RIGHT_CC_1
    - study_uid         → ex: 1.3.6.1.4.1.xxxxx... (primeiro UID)
    - series_uid        → ex: 1.3.6.1.4.1.yyyyy... (segundo UID)
    - filename          → ex: 000001.dcm
    """
    if pd.isna(path):
        return None, None, None, None
    
    parts = path.strip().split("/")
    
    if len(parts) < 4:
        return None, None, None, None
    
    dicom_patient_id = parts[0]
    study_uid        = parts[1]
    series_uid       = parts[2]
    filename         = parts[3]

    return dicom_patient_id, study_uid, series_uid, filename


# ================================================================
# 2. Aplicar ao full_df para criar colunas explícitas
# ================================================================
full_df["dicom_patient_id"] = full_df["cropped_image_file_path"].apply(lambda x: extract_uids(x)[0])
full_df["study_uid"]        = full_df["cropped_image_file_path"].apply(lambda x: extract_uids(x)[1])
full_df["series_uid"]       = full_df["cropped_image_file_path"].apply(lambda x: extract_uids(x)[2])
full_df["dicom_filename"]   = full_df["cropped_image_file_path"].apply(lambda x: extract_uids(x)[3])

# Remover entradas inválidas
full_df = full_df[full_df["dicom_patient_id"].notna()]

print("full_df com UIDs extraídos:", len(full_df))
full_df['mass_shape'] = full_df['mass_shape'].fillna(method='bfill')
full_df['mass_margins'] = full_df['mass_margins'].fillna(method='bfill')

# ================================================================
# 3. Preparar dicom_clean para merge
# ================================================================
dicom_key_cols = ["PatientID", "StudyInstanceUID", "SeriesInstanceUID", "image_path", "SeriesDescription"]

dicom_clean_sub = dicom_clean[dicom_key_cols].copy()

dicom_clean_sub = dicom_clean_sub.rename(columns={
    "PatientID": "dicom_patient_id",
    "StudyInstanceUID": "study_uid",
    "SeriesInstanceUID": "series_uid",
    "image_path": "dicom_jpeg_path"
})

print("dicom_clean pronto para merge:", len(dicom_clean_sub))


# ================================================================
# 4. MERGE: relação perfeita entre full_df e dicom_clean
# ================================================================
merged_df = pd.merge(
    full_df,
    dicom_clean_sub,
    on=["dicom_patient_id", "study_uid", "series_uid"],
    how="left"
)

print("Merged entries:", len(merged_df))
print("JPEG real encontrado em:", merged_df["dicom_jpeg_path"].notna().sum())


# ================================================================
# 5. Filtrar somente entradas com JPEG existente (cropped não necessário)
# ================================================================
# merged_df["jpeg_exists"] = merged_df["dicom_jpeg_path"].apply(
#     lambda p: os.path.exists(os.path.join(base_dir, p)) if isinstance(p, str) else False
# )

# final_df = merged_df.copy()

# print("Final dataset (sem ROI, apenas JPEG válido):", len(final_df))

# # ================================================================
# # 6. Caminho final absoluto para treinamento
# # ================================================================
# final_df["jpeg_full_path"] = final_df["dicom_jpeg_path"].apply(lambda p: os.path.join(base_dir, p))

print(merged_df[["dicom_jpeg_path", "pathology", "label"]].head())


In [None]:
#merged_df.head()
#dicom_clean_sub[dicom_clean_sub['series_uid'] == '1.3.6.1.4.1.9590.100.1.2.393344010211719049419601138200355094682']
#full_df[full_df['patient_id'] == 'P_00034']['cropped_image_file_path'][49]
merged_df[merged_df['patient_id'] == 'P_00034']
#merged_df[merged_df['series_uid'] == '1.3.6.1.4.1.9590.100.1.2.40204365512880018321779759940450653990']
#1.3.6.1.4.1.9590.100.1.2.40204365512880018321779759940450653990

In [None]:
merged_df.count()

In [None]:
filtered_df = merged_df[ merged_df["SeriesDescription"] == "cropped images" ].copy()
print(filtered_df["SeriesDescription"].value_counts())
print(len(filtered_df))

In [None]:
filtered_df.head()

In [None]:
filtered_df[filtered_df['patient_id'] == 'P_00034']['dicom_jpeg_path'][98]

In [None]:
filtered_df.isnull().sum()

In [None]:
filtered_df['mass_shape'] = filtered_df['mass_shape'].fillna(method='bfill')
filtered_df['mass_margins'] = filtered_df['mass_margins'].fillna(method='bfill')
filtered_df['calc_type'] = filtered_df['calc_type'].fillna(method='bfill')
filtered_df['calc_distribution'] = filtered_df['calc_distribution'].fillna(method='bfill')

In [None]:
cols_to_drop = [
    'dicom_filename','image_file_path','cropped_image_file_path','ROI_mask_file_path',
]

filtered_df.drop(cols_to_drop, axis=1, inplace=True, errors='ignore')

In [None]:
print(filtered_df['abnormality_type'].value_counts())
print(filtered_df['label'].value_counts())
print(filtered_df['image_view'].value_counts())

# Optuna

In [None]:
!pip install optuna
!pip install optuna-integration

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetV2M
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import optuna
from optuna.integration import TFKerasPruningCallback

df = filtered_df.copy()
df = df[df["dicom_jpeg_path"].notna()] 
df['filepath'] = df['dicom_jpeg_path'].str.replace('CBIS-DDSM', base_dir)

print("Amostras válidas:", len(df))

In [None]:
df["label"] = df["label"].astype(str)
X_paths = df["filepath"].values
y_labels = df["label"].values.astype(int)

In [None]:
import optuna
from sklearn.model_selection import StratifiedKFold
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet152V2
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import Callback


# ================================================================
# CALLBACK PARA MOSTRAR TREINAMENTO DETALHADO POR EPOCH
# ================================================================
class EpochProgressCallback(Callback):
    def __init__(self, trial_number, fold_number):
        super().__init__()
        self.trial_number = trial_number
        self.fold_number = fold_number

    def on_epoch_begin(self, epoch, logs=None):
        print(f"[Trial {self.trial_number} | Fold {self.fold_number}] "
              f"Iniciando epoch {epoch + 1}")

    def on_epoch_end(self, epoch, logs=None):
        print(f"[Trial {self.trial_number} | Fold {self.fold_number}] "
              f"Epoch {epoch + 1} — loss={logs.get('loss'):.4f}, "
              f"acc={logs.get('accuracy'):.4f}, "
              f"val_loss={logs.get('val_loss'):.4f}, "
              f"val_acc={logs.get('val_accuracy'):.4f}")


# ================================================================
# CONSTRUÇÃO DO MODELO (RESNET152V2)
# ================================================================
def build_model(unfreeze_ratio, optimizer_name, lr, dropout):
    base = ResNet152V2(weights="imagenet", include_top=False, input_shape=(512, 512, 3))
    base.trainable = False

    # unfreeze percentual das últimas camadas
    if unfreeze_ratio > 0:
        n_layers = len(base.layers)
        n_unfreeze = int(n_layers * unfreeze_ratio)
        for layer in base.layers[-n_unfreeze:]:
            layer.trainable = True

    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dropout(dropout)(x)
    out = layers.Dense(1, activation="sigmoid")(x)

    model = models.Model(inputs=base.input, outputs=out)

    if optimizer_name == "adamw":
        optimizer = optimizers.AdamW(learning_rate=lr)
    elif optimizer_name == "adam":
        optimizer = optimizers.Adam(learning_rate=lr)
    elif optimizer_name == "rmsprop":
        optimizer = optimizers.RMSprop(learning_rate=lr)

    model.compile(
        optimizer=optimizer,
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model


# ================================================================
# DATAGENS
# ================================================================
def build_datagen():
    return ImageDataGenerator(
        rescale=1./255,
        rotation_range=15,
        horizontal_flip=True,
        zoom_range=0.10,
        width_shift_range=0.05,
        height_shift_range=0.05,
        fill_mode="nearest"
    )

def build_test_datagen():
    return ImageDataGenerator(rescale=1./255)


# ================================================================
# OBJETIVO DO OPTUNA
# ================================================================
def objective(trial):

    unfreeze_ratio = trial.suggest_categorical("unfreeze_ratio", [0.2, 0.4, 0.6, 1.0])
    batch_size = trial.suggest_categorical("batch_size", [8, 12])
    optimizer_name = trial.suggest_categorical("optimizer", ["adamw", "adam", "rmsprop"])
    lr = trial.suggest_float("lr", 1e-5, 5e-4, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    epochs = trial.suggest_int("epochs", 3, 10)

    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    fold_accuracies = []

    X = np.array(X_paths)
    y = np.array(y_labels)

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        print(f"\n========== Trial {trial.number} — Fold {fold+1} ==========")

        train_gen = build_datagen().flow_from_dataframe(
            dataframe=df.iloc[train_idx],
            x_col="filepath",
            y_col="label",
            target_size=(512, 512),
            class_mode="binary",
            batch_size=batch_size,
            shuffle=True
        )

        val_gen = build_test_datagen().flow_from_dataframe(
            dataframe=df.iloc[val_idx],
            x_col="filepath",
            y_col="label",
            target_size=(512, 512),
            class_mode="binary",
            batch_size=batch_size,
            shuffle=False
        )

        model = build_model(unfreeze_ratio, optimizer_name, lr, dropout)

        progress_callback = EpochProgressCallback(
            trial_number=trial.number,
            fold_number=fold + 1
        )

        history = model.fit(
            train_gen,
            validation_data=val_gen,
            epochs=epochs,
            verbose=0,  # silêncio do Keras; mostramos só via callback
            callbacks=[progress_callback]
        )

        best_acc = max(history.history["val_accuracy"])
        fold_accuracies.append(best_acc)

    return np.mean(fold_accuracies)


# ================================================================
# EXECUÇÃO DO OPTUNA
# ================================================================
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=8)

print("Melhores parâmetros:", study.best_params)
print("Melhor score:", study.best_value)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, f1_score, recall_score, precision_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold
import numpy as np
import pandas as pd

###############################################################
# 1. Hiperparâmetros definidos a partir da melhor Trial Optuna
###############################################################

UNFREEZE_RATIO = 0.4
BATCH_SIZE = 12
LR = 3.3988121810437836e-05
DROPOUT = 0.2500453237976704
EPOCHS = 9   # 9 era o best nas trials, mas agora usamos early stopping
IMG_SIZE = 384
N_FOLDS = 3

## Função de criação do modelo EfficientNetV2-M

In [None]:
def build_model(lr=LR, dropout=DROPOUT, unfreeze_ratio=UNFREEZE_RATIO):

    base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2M(
        include_top=False,
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        weights="imagenet",
        pooling=None
    )

    total_layers = len(base_model.layers)
    unfreeze_until = int(total_layers * unfreeze_ratio)

    # Congela parte inicial da rede (se ratio < 1)
    for i, layer in enumerate(base_model.layers):
        layer.trainable = (i >= (total_layers - unfreeze_until))

    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = tf.keras.applications.efficientnet_v2.preprocess_input(inputs)
    x = base_model(x, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = models.Model(inputs, outputs)

    # AdamW otimizado
    optimizer = tf.keras.optimizers.AdamW(learning_rate=lr)

    model.compile(
        optimizer=optimizer,
        loss="binary_crossentropy",
        metrics=[
            tf.keras.metrics.BinaryAccuracy(name="accuracy"),
            tf.keras.metrics.AUC(name="auc"),
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall")
        ]
    )

    return model

## Data augmentation para treino

In [None]:
def build_datagen():
    return ImageDataGenerator(
        rescale=1./255,
        horizontal_flip=True,
        vertical_flip=True,
        rotation_range=8,
        width_shift_range=0.05,
        height_shift_range=0.05,
        brightness_range=[0.9, 1.1]
    )

In [None]:
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)

final_preds = []
final_labels = []

for fold, (train_idx, val_idx) in enumerate(skf.split(df, df["label"])):
    print(f"\n==================== Fold {fold+1}/{N_FOLDS} ====================")

    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)

    # Data generators
    train_gen = build_datagen().flow_from_dataframe(
        train_df,
        x_col="filepath",
        y_col="label",
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode="binary",
        batch_size=BATCH_SIZE,
        shuffle=True
    )

    val_gen = ImageDataGenerator(rescale=1./255).flow_from_dataframe(
        val_df,
        x_col="filepath",
        y_col="label",
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode="binary",
        batch_size=BATCH_SIZE,
        shuffle=False
    )

    # Monta o modelo
    model = build_model()

    # Callbacks
    ckpt_path = f"best_model_fold{fold+1}.h5"
    
    callbacks = [
        EarlyStopping(
            monitor="val_auc",
            patience=5,
            mode="max",
            restore_best_weights=True
        ),
        ModelCheckpoint(
            ckpt_path,
            monitor="val_auc",
            mode="max",
            save_best_only=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor="val_loss",
            factor=0.5,
            patience=3,
            verbose=1
        )
    ]

    # Treinamento
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1
    )

    # Predições do fold
    preds = model.predict(val_gen).ravel()
    final_preds.extend(preds)
    final_labels.extend(val_df["label"].values)

    tf.keras.backend.clear_session()

In [None]:
###############################################################
# 5. Métricas finais consolidadas
###############################################################

final_preds_bin = (np.array(final_preds) >= 0.5).astype(int)
final_labels_arr = np.array(final_labels).astype(int)

print("\n==================== Resultados Finais (K-Fold) ====================\n")
print(classification_report(final_labels_arr, final_preds_bin, digits=4))

print("F1-score:", f1_score(final_labels_arr, final_preds_bin))
print("Recall:", recall_score(final_labels_arr, final_preds_bin))
print("Precision:", precision_score(final_labels_arr, final_preds_bin))
print("AUC:", roc_auc_score(final_labels_arr, final_preds))

In [None]:
print(type(final_labels_arr[0]))
print(type(final_preds_bin[0]))
