In [1]:
import nibabel as nib
import numpy as np
import torch
import torchio as tio
from skimage.transform import resize
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from tqdm import tqdm




In [2]:
def calculate_label(image, threshold=0.01):
    """
    Determina o label da subimagem com base no percentual de fundo não-preto.
    :param subimage: Array da subimagem.
    :param threshold: Percentual mínimo de fundo não-preto para considerar como label 1.
    :return: String indicando o label.
    """
    # Total de pixels na subimagem
    total_pixels = image.size
    # Número de pixels não-preto
    non_zero_pixels = np.count_nonzero(image)
    # Proporção de pixels não-preto
    non_black_ratio = non_zero_pixels / total_pixels if total_pixels > 0 else 0
    
    # Verifica se há lesão e se o fundo não-preto é maior que o limiar
    if np.any(image == 1) and non_black_ratio >= threshold:
        return 1
    else:
        return 0

def load_patient_data(folder, patient_id):
    """
    Carrega os dados de um único paciente (imagens, máscaras e labels) de um diretório.

    Args:
        folder (str): Caminho da pasta contendo os dados dos pacientes.
        patient_id (str): ID do paciente a ser carregado.

    Returns:
        dict: Dados do paciente, incluindo imagens, máscaras e labels para os lados esquerdo e direito.
              Retorna None se o paciente não for encontrado.
    """
    patient_path = os.path.join(folder, patient_id)
    if not os.path.exists(patient_path):
        print(f"Paciente {patient_id} não encontrado na pasta {folder}.")
        return None

    # Inicializa estruturas para armazenar os dados do paciente
    patient_data = {
        "images_left": [],
        "images_right": [],
        "mask_left": [],
        "mask_right": [],
        "labels_left": [],
        "labels_right": [],
    }

    areas_image = ["left", "right"]
    areas_mask = ["lesion_left", "lesion_right"]
    path_left = os.path.join(patient_path, areas_image[0])
    path_right = os.path.join(patient_path, areas_image[1])
    lesion_path_left = os.path.join(patient_path, areas_mask[0])
    lesion_path_right = os.path.join(patient_path, areas_mask[1])

    # Verifica se os diretórios existem
    if not os.path.exists(path_left) or not os.path.exists(path_right) or \
       not os.path.exists(lesion_path_left) or not os.path.exists(lesion_path_right):
        print(f"Estrutura de diretórios inválida para o paciente {patient_id}.")
        return None

    # Carrega as imagens e máscaras do lado esquerdo e direito
    for patch_id_left, mask_id_left, patch_id_right, mask_id_right in zip(
        os.listdir(path_left), os.listdir(lesion_path_left),
        os.listdir(path_right), os.listdir(lesion_path_right)
    ):
        img_path_left = os.path.join(path_left, patch_id_left)
        mask_path_left = os.path.join(lesion_path_left, mask_id_left)
        img_path_right = os.path.join(path_right, patch_id_right)
        mask_path_right = os.path.join(lesion_path_right, mask_id_right)

        for img_left, msk_left, img_right, msk_right in zip(
            os.listdir(img_path_left), os.listdir(mask_path_left),
            os.listdir(img_path_right), os.listdir(mask_path_right)
        ):
            # Carrega os dados do lado esquerdo
            data_left = nib.load(os.path.join(img_path_left, img_left)).get_fdata()
            data_msk_left = nib.load(os.path.join(mask_path_left, msk_left)).get_fdata()
            if len(data_left) > 0 or len(data_msk_left) > 0:
                patient_data["images_left"].append(data_left)
                patient_data["mask_left"].append(data_msk_left)
                patient_data["labels_left"].append(calculate_label(data_msk_left))

            # Carrega os dados do lado direito
            data_right = nib.load(os.path.join(img_path_right, img_right)).get_fdata()
            data_msk_right = nib.load(os.path.join(mask_path_right, msk_right)).get_fdata()
            if len(data_right) > 0 or len(data_msk_right) > 0:
                patient_data["images_right"].append(data_right)
                patient_data["mask_right"].append(data_msk_right)
                patient_data["labels_right"].append(calculate_label(data_msk_right))

    # Gera os pares de labels
    labels_pair = []
    for label_left, label_right in zip(patient_data["labels_left"], patient_data["labels_right"]):
        if label_left == 0 and label_right == 0:
            labels_pair.append(0)
        else:
            labels_pair.append(1)
    patient_data["labels_pair"] = labels_pair

    print(f"Paciente {patient_id} carregado com sucesso.")
    print(f"Total de recortes: {len(labels_pair)}")
    return patient_data, labels_pair

In [3]:
# Função para normalizar entre 0 e 1
def normalize_minmax(image_data): 
    min_val = np.min(image_data)
    max_val = np.max(image_data)
    normalized_data = (image_data - min_val) / (max_val - min_val)
    return normalized_data

# Função para filtrar as imagens por paciente
def select_by_patients(patients, all_images_original, all_images_opposite, all_labels):
    selected_images_original = {}
    selected_images_opposite = {}
    selected_labels = []
    
    for patient in patients:
        selected_images_original[patient] = []
        selected_images_opposite[patient] = []
        if patient in all_images_original and patient in all_images_opposite:
            selected_images_original[patient].extend(all_images_original[patient])
            selected_images_opposite[patient].extend(all_images_opposite[patient])
            selected_labels.extend(all_labels[patient])
        else:
            print(f"Paciente {patient} não encontrado em uma das listas de imagens.")
    
    return selected_images_original, selected_images_opposite, selected_labels

In [None]:
# Função para preparar os dados para treino e validação
def prepare_data_for_training(images_left, images_right, labels_pair, mask_left, mask_right, train_size=0.7, validation_size=0.2, test_size=0.1, augment_factor=2):
    balanced_images_left = {}
    balanced_images_right = {}
    balanced_labels = {}
    balanced_mask_left = {}
    balanced_mask_right = {}
    
    for patient_id in images_left:
        class_1_labels = []
        class_0_labels = []
        class_1_left = []
        class_0_left = []
        class_1_right = []
        class_0_right = []
        class_1_mask_left = []
        class_0_mask_left = []
        class_1_mask_right = []
        class_0_mask_right = []
        
        index = 0
        
        if len(labels_pair[patient_id]) == 0:
            continue
        
        for label in labels_pair[patient_id]:
            image_left = images_left[patient_id][index]
            image_right = images_right[patient_id][index]
            mask_image_left = mask_left[patient_id][index]
            mask_image_right = mask_right[patient_id][index]
            index += 1
            
            if label == 1:
                class_1_labels.append(label)
                class_1_left.append(image_left)
                class_1_right.append(image_right)
                class_1_mask_left.append(mask_image_left)
                class_1_mask_right.append(mask_image_right)
            else:
                class_0_labels.append(label)
                class_0_left.append(image_left)
                class_0_right.append(image_right)
                class_0_mask_left.append(mask_image_left)
                class_0_mask_right.append(mask_image_right)

        class_1_count = len(class_1_labels)
        class_0_count = len(class_0_labels)
        print(f"Paciente {patient_id}: Total de patches classe 0: {class_0_count}")
        print(f"Paciente {patient_id}: Total de patches classe 1 antes do uppersampling: {class_1_count}")
        
        # Se a classe 1 for menor, aplicamos data augmentation
        if class_1_count < class_0_count:
            needed_samples = class_0_count - class_1_count
            augmented_images_left = []
            augmented_images_right = []
            augmented_masks_left = []
            augmented_masks_right = []
            
            transform = tio.Compose([
                tio.RandomAffine(scales=(0.9, 1.1), degrees=10, translation=5),
                tio.RandomNoise(std=0.02),
                tio.RandomFlip(axes=(0, 1))
            ])
            
            for _ in range(needed_samples):
                idx = np.random.randint(class_1_count)
                img_left_aug = transform(tio.ScalarImage (tensor=class_1_left[idx][None, None]))
                img_right_aug = transform(tio.ScalarImage (tensor=class_1_right[idx][None, None]))
                mask_left_aug = transform(tio.ScalarImage (tensor=class_1_mask_left[idx][None, None]))
                mask_right_aug = transform(tio.ScalarImage (tensor=class_1_mask_right[idx][None, None]))
                
                augmented_images_left.append(img_left_aug.tensor.squeeze(0))
                augmented_images_right.append(img_right_aug.tensor.squeeze(0))
                augmented_masks_left.append(mask_left_aug.tensor.squeeze(0))
                augmented_masks_right.append(mask_right_aug.tensor.squeeze(0))
            
            class_1_left.extend(augmented_images_left)
            class_1_right.extend(augmented_images_right)
            class_1_mask_left.extend(augmented_masks_left)
            class_1_mask_right.extend(augmented_masks_right)
            class_1_labels.extend([1] * needed_samples)
          
        class_1_count = len(class_1_labels)
        print(f"Paciente {patient_id}: Total de patches classe 1 depois do uppersampling: {class_1_count}")  
        balanced_images_left[patient_id] = class_1_left + class_0_left
        balanced_images_right[patient_id] = class_1_right + class_0_right
        balanced_labels[patient_id] = class_1_labels + class_0_labels
        balanced_mask_left[patient_id] = class_1_mask_left + class_0_mask_left
        balanced_mask_right[patient_id] = class_1_mask_right + class_0_mask_right
        
        balanced_index = np.random.choice(range(len(balanced_labels[patient_id])), len(balanced_labels[patient_id]), replace=False)            
        balanced_images_left[patient_id] = [balanced_images_left[patient_id][i] for i in balanced_index]
        balanced_images_right[patient_id] = [balanced_images_right[patient_id][i] for i in balanced_index]
        balanced_labels[patient_id] = [balanced_labels[patient_id][i] for i in balanced_index]
        balanced_mask_left[patient_id] = [balanced_mask_left[patient_id][i] for i in balanced_index]
        balanced_mask_right[patient_id] = [balanced_mask_right[patient_id][i] for i in balanced_index]
        
    return balanced_images_left, balanced_images_right, balanced_labels

In [5]:

# Caminho da pasta contendo os dados dos pacientes
folder = "Novo_Contralateral"

# Lista de IDs dos pacientes
patient_ids = os.listdir(folder)

X_left, X_right, y, mask_left, mask_right = {}, {}, {}, {}, {}

# Processa um paciente por vez
for patient_id in patient_ids:
    if patient_id != "sub-60K04":
        continue
    # Carrega os dados do paciente
    patient_data, labels_pair = load_patient_data(folder, patient_id)
    
    if patient_data is not None:
        X_left[patient_id] = patient_data["images_left"]
        X_right[patient_id] = patient_data["images_right"]
        mask_left[patient_id] = patient_data["mask_left"]
        mask_right[patient_id] = patient_data["mask_right"]
        y[patient_id] = labels_pair

Paciente sub-60K04 carregado com sucesso.
Total de recortes: 1060


In [6]:
# Preparar dados para treino e validação
balanced_images_left, balanced_images_right, balanced_labels = prepare_data_for_training(X_left, X_right, y, mask_left, mask_right, train_size=0.7, validation_size=0.2, test_size=0.1)

Paciente sub-60K04: Total de patches classe 0: 966
Paciente sub-60K04: Total de patches classe 1 antes do uppersampling: 94
Paciente sub-60K04: Total de patches classe 1 depois do uppersampling: 966


In [15]:
def fix_shape(image):
    """
    Corrige a forma das imagens removendo dimensões extras e garantindo um formato consistente.
    """
    if isinstance(image, torch.Tensor):
        image = image.numpy()  # Converte para NumPy se for Tensor do PyTorch
    return np.squeeze(image)  # Remove dimensões extras caso existam

# Normaliza e adiciona canal extra para rede neural
balanced_images_left = normalize_minmax(np.array([fix_shape(img) for lista in balanced_images_left for img in lista]))
balanced_images_left = np.expand_dims(balanced_images_left, axis=-1)

balanced_images_right = normalize_minmax(np.array([fix_shape(img) for lista in balanced_images_right for img in lista]))
balanced_images_right = np.expand_dims(balanced_images_right, axis=-1)

In [None]:
def plot_patient_slices(pdf_filename, images_left, images_right, patient_slices):
    with PdfPages(pdf_filename) as pdf:
        for slices in patient_slices:
            num_slices = min(slices, len(images_left), len(images_right))
            
            for i in range(num_slices):
                fig, axes = plt.subplots(1, 2, figsize=(10, 5))
                
                axes[0].imshow(images_left[i], cmap='gray')
                axes[0].set_title(f'Esquerdo - Slice {i+1}')
                axes[0].axis('off')
                
                axes[1].imshow(images_right[i], cmap='gray')
                axes[1].set_title(f'Direito - Slice {i+1}')
                axes[1].axis('off')
                
                pdf.savefig(fig)
                plt.close(fig)
    
    print(f"Arquivo PDF gerado: {pdf_filename}")

plot_patient_slices("Pdf/Teste_Undersampling_Todos3.pdf", balanced_images_left, balanced_images_right, patient_slices=[120,220,154,54,40,158,228,60,466,94,132,236,244,148,120,282,128,56,248,150,208,284,292,304,168,258,146,204,488,276,188,186,124,300,172,166,166,112,288,158,308])