In [13]:
import numpy
import tensorflow as tf
from cv2 import COLOR_BGR2GRAY, COLOR_GRAY2RGB, createCLAHE, cvtColor, imread, resize
from keras.callbacks import ModelCheckpoint, Callback
from keras.layers import (
    BatchNormalization,
    Conv2D,
    Dense,
    Dropout,
    GlobalAveragePooling2D,
    Input,
    MaxPooling2D,
    SeparableConv2D,
)
from keras.mixed_precision import Policy, set_global_policy
from keras.models import Sequential, load_model
from keras.regularizers import l1_l2
from matplotlib.pyplot import ioff, ion, show, subplots
from pandas import DataFrame
from sklearn.model_selection import StratifiedKFold
from tensorflow import __version__, py_function
from tensorflow.config import run_functions_eagerly, PhysicalDevice
from tensorflow.config.experimental import list_physical_devices, set_memory_growth

# Política de precisão mista para melhorar o desempenho em GPUs compatíveis
set_global_policy(Policy("mixed_float16"))

# habilita alocamento de memória dinâmica quando necessário para GPUs
gpus: list[PhysicalDevice] = list_physical_devices("GPU")
for gpu in gpus:
    set_memory_growth(gpu, True)

# desabilita execução ansiosa para TensorFlow 1.x, (versão 2.x já tem isso desabilitado por padrão)
if int(__version__[0]) < 2:
    run_functions_eagerly(False)

ACTIVATION_FUNCTION: str = "relu"
BATCH_SIZE: int = 32
DROPOUT: float = 0.3
FIELD_SIZE: int = 3
FILTERS: int = 32
IMAGE_DIMENSION: int = 200
KERNEL_REGULARIZER = l1_l2(l1=1e-5, l2=1e-4)
PADDING: str = "same"
POOL_SIZE: int = 2
UNITS: int = FILTERS * 2


class GraficoAcompanhamento(Callback):
    def on_train_begin(self, logs=None):
        self.epoch: list[int] = []
        self.train_acc: list[float] = []
        self.val_acc: list[float] = []
        self.train_loss: list[float] = []
        self.val_loss: list[float] = []

    def on_epoch_end(self, epoch, logs=None):
        self.epoch.append(epoch + 1)
        self.train_acc.append(logs.get("accuracy"))
        self.val_acc.append(logs.get("val_accuracy"))
        self.train_loss.append(logs.get("loss"))
        self.val_loss.append(logs.get("val_loss"))

    def on_train_end(self, logs=None):
        self.fig, self.ax = subplots(1, 2, figsize=(12, 4))
        ion()

        self.ax[0].clear()
        self.ax[1].clear()

        self.ax[0].plot(self.epoch, self.train_acc, label="Treino")
        self.ax[0].plot(self.epoch, self.val_acc, label="Validação")
        self.ax[0].set_title("Precisão")
        self.ax[0].set_xlabel("Época")
        self.ax[0].set_ylabel("Precisão")
        self.ax[0].legend()
        self.ax[0].grid(True)

        self.ax[1].plot(self.epoch, self.train_loss, label="Treino")
        self.ax[1].plot(self.epoch, self.val_loss, label="Validação")
        self.ax[1].set_title("Perda")
        self.ax[1].set_xlabel("Época")
        self.ax[1].set_ylabel("Perda")
        self.ax[1].legend()
        self.ax[1].grid(True)

        ioff()
        show()


def tf_preprocess(path, label):
    path, label = py_function(
        func=preprocess_image, inp=[path, label], Tout=[tf.float32, tf.float32]
    )
    path.set_shape((IMAGE_DIMENSION, IMAGE_DIMENSION, 3))
    label.set_shape(())
    return path, label


def preprocess_image(path: str, label: int):
    path = path.numpy().decode("utf-8")
    image = imread(path)
    gray_scale_image = cvtColor(image, COLOR_BGR2GRAY)
    clahe = createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(gray_scale_image)
    rgb_image = cvtColor(enhanced_image, COLOR_GRAY2RGB)
    resized_image = resize(rgb_image, IMAGE_SIZE)
    normalized_image = resized_image / 255.0
    return (
        (normalized_image - normalized_image.mean()) / (normalized_image.std() + 1e-8)
    ).astype(numpy.float32), numpy.int32(label)


def create_model(
    ACTIVATION_FUNCTION=ACTIVATION_FUNCTION,
    DROPOUT=DROPOUT,
    FIELD_SIZE=FIELD_SIZE,
    FILTERS=FILTERS,
    IMAGE_DIMENSION=IMAGE_DIMENSION,
    KERNEL_REGULARIZER=KERNEL_REGULARIZER,
    PADDING=PADDING,
    POOL_SIZE=POOL_SIZE,
    UNITS=UNITS,
) -> Sequential:
    model = Sequential()
    model.add(Input(shape=(IMAGE_DIMENSION, IMAGE_DIMENSION, 3)))
    model.add(
        Conv2D(FILTERS, FIELD_SIZE, activation=ACTIVATION_FUNCTION, padding=PADDING)
    )
    model.add(Dropout(DROPOUT))
    model.add(MaxPooling2D(pool_size=POOL_SIZE))
    model.add(BatchNormalization())
    model.add(
        Conv2D(FILTERS, FIELD_SIZE, activation=ACTIVATION_FUNCTION, padding=PADDING)
    )
    model.add(Dropout(DROPOUT))
    model.add(MaxPooling2D(pool_size=POOL_SIZE))
    model.add(BatchNormalization())

    model.add(
        SeparableConv2D(
            FILTERS, FIELD_SIZE, activation=ACTIVATION_FUNCTION, padding=PADDING
        )
    )
    model.add(Dropout(DROPOUT))
    model.add(MaxPooling2D(pool_size=POOL_SIZE))
    model.add(BatchNormalization())
    model.add(
        SeparableConv2D(
            FILTERS, FIELD_SIZE, activation=ACTIVATION_FUNCTION, padding=PADDING
        )
    )
    model.add(Dropout(DROPOUT))
    model.add(MaxPooling2D(pool_size=POOL_SIZE))
    model.add(BatchNormalization())

    model.add(GlobalAveragePooling2D())

    model.add(
        Dense(
            units=UNITS,
            activation=ACTIVATION_FUNCTION,
            kernel_regularizer=KERNEL_REGULARIZER,
        )
    )
    model.add(Dropout(DROPOUT))
    model.add(BatchNormalization())
    model.add(
        Dense(
            units=UNITS * 2,
            activation=ACTIVATION_FUNCTION,
            kernel_regularizer=KERNEL_REGULARIZER,
        )
    )
    model.add(Dropout(DROPOUT))
    model.add(BatchNormalization())
    model.add(
        Dense(
            units=UNITS * 2,
            activation=ACTIVATION_FUNCTION,
            kernel_regularizer=KERNEL_REGULARIZER,
        )
    )
    model.add(Dropout(DROPOUT))
    model.add(BatchNormalization())
    model.add(
        Dense(
            units=UNITS,
            activation=ACTIVATION_FUNCTION,
            kernel_regularizer=KERNEL_REGULARIZER,
        )
    )
    model.add(Dropout(0.5))
    model.add(BatchNormalization())

    UNITS = 1
    ACTIVATION_FUNCTION = "sigmoid"
    model.add(Dense(units=UNITS, activation=ACTIVATION_FUNCTION, dtype="float32"))

    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model


IMAGE_SIZE: tuple[int, int] = (IMAGE_DIMENSION, IMAGE_DIMENSION)
TRAIN_DIR: str = "imagensCancerMama/treino_dataset"
dataframe_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0 / 255)
dataframe_data = dataframe_generator.flow_from_directory(
    TRAIN_DIR,
    shuffle=False,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
)

dataframe = DataFrame(
    {"filepaths": dataframe_data.filepaths, "labels": dataframe_data.labels.astype(str)}
)


train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0 / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True
)
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0 / 255)

Found 529 images belonging to 2 classes.


In [14]:
def preprocess_dataset(df: DataFrame):
    paths = df["filepaths"].values
    labels = df["labels"].values
    dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
    dataset = dataset.map(tf_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return dataset


models_accuracies: list[tuple[float, float]] = []
sk_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
fold = 1
for train, val in sk_fold.split(dataframe["filepaths"], dataframe["labels"]):
    train_dataframe = dataframe.iloc[train].reset_index(drop=True)
    val_dataframe = dataframe.iloc[val].reset_index(drop=True)

    train_dataset = preprocess_dataset(train_dataframe)
    val_dataset = preprocess_dataset(val_dataframe)

    model_path = f"model_fold_{fold}.keras"
    best_model = ModelCheckpoint(
        filepath=model_path, monitor="val_accuracy", verbose=1, save_best_only=True
    )
    model = create_model()

    trained_model = model.fit(
        train_dataset,
        epochs=308,
        validation_data=val_dataset,
        callbacks=[best_model, GraficoAcompanhamento()],
        verbose=1,
    )
    val_accuracies: tuple[float] = trained_model.history["val_accuracy"]
    max_val_accuracy: float = max(val_accuracies)
    max_val_index: int = val_accuracies.index(max_val_accuracy)
    accuracy: tuple[float, float] = (
        max_val_accuracy,
        trained_model.history["accuracy"][max_val_index],
    )
    models_accuracies.append(accuracy)
    fold += 1

best_accuracy: float = max(models_accuracies)
best_model: int = models_accuracies.index(best_accuracy) + 1
breast_cancer_classifier = load_model(f"model_fold_{best_model}.keras")

Epoch 1/308
[1m 3/15[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m1:44[0m 9s/step - accuracy: 0.5608 - loss: 0.9747

KeyboardInterrupt: 

In [None]:
import os
import numpy as np
from keras.utils import load_img, img_to_array
from IPython.display import Image

%store breast_cancer_classifier
%store best_accuracy


diretorio_path = 'imagensCancerMama/teste_dataset'
imagens = os.listdir(diretorio_path)
index = np.random.randint(0, len(imagens))
imagem = imagens[index]
imagem_path = os.path.join(diretorio_path, imagem)

print(best_accuracy)

test_image = load_img(imagem_path, target_size=IMAGE_SIZE)
test_image = img_to_array(test_image)
test_image = np.expand_dims(test_image, axis=0)
resultado = breast_cancer_classifier.predict(test_image)
print(f"Resultado da predição: {resultado[0][0]}")
print(imagem)
print("É um câncer maligno" if resultado[0][0] >= 0.5 else "É um câncer benigno")
Image(filename=imagem_path)
