In [None]:
!pip install opendatasets
!pip install pandas
!pip install imagehash
!pip install glob
!pip install kagglehub
!pip install tensorflow
!pip install imbalanced-learn

In [None]:
import pandas as pd
import opendatasets as od
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import cv2
import os
import kagglehub
import glob
import imagehash
%matplotlib inline
import random
import shutil
from PIL import Image
from collections import defaultdict
from imgaug import augmenters as iaa
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, UpSampling2D, concatenate
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split

In [None]:
od.download("https://www.kaggle.com/datasets/aryashah2k/breast-ultrasound-images-dataset/data")

In [None]:
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

In [None]:
count_corrupted = 0
corrupted = list()

In [None]:
dataframe_list = list()
for folder in os.listdir(root_dir):
  child_dir = os.path.join(root_dir, folder)

  for image in os.listdir(child_dir):
    img_dir = os.path.join(child_dir, image)
    _, image_format = image.split('.')
    img = cv2.imread(img_dir)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    if img is not None:
      # Converter a imagem OpenCV para uma imagem do Pillow
      img_pil = Image.fromarray(img_rgb)

      # Gerar o hash da imagem (usando perceptual hash como exemplo)
      img_hash = imagehash.phash(img_pil)
      width, height, channels = img.shape
      img_corrupted = False
    else:
      count_corrupted += 1
      corrupted.append(img_dir)
      img_corrupted = True
      img_hash, image_format, width, height, channels = None, None, None, None, None

    dataframe_list.append([img_dir, img_corrupted, img_hash, image_format, width, height, channels, folder])

In [None]:
df = pd.DataFrame(columns=['image_path', 'corrupted', 'image_hash', 'image_format', 'width', 'height', 'channels', 'label'], data = dataframe_list)

In [None]:
df.describe().loc[['mean', 'std', 'min', 'max']]

In [None]:
sns.set(style="darkgrid")

image_format_counts = df['image_format'].value_counts()

plt.figure(figsize=(10, 6))
sns.barplot(x=image_format_counts.index, y=image_format_counts.values, palette="Greens", width=0.40)

plt.title('Número de Imagens por formato', fontsize=16)
plt.xlabel('Formato da Imagem', fontsize=12)
plt.ylabel('Número de Imagens', fontsize=12)

plt.show()

In [None]:
## Benigno
diretorio_benign = "/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign"

image_count = 0
for subdir, dirs, files in os.walk(diretorio_benign):
    for file in files:
        if file.endswith(('.png')):
            image_count += 1

print(f"Total de imagens no diretório de benigno: {image_count}")

## Normal
diretorio_normal = "/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/normal"

image_count = 0

for subdir, dirs, files in os.walk(diretorio_normal):
    for file in files:
        if file.endswith(('.png')):
            image_count += 1

print(f"Total de imagens no diretório normal: {image_count}")

#Maligno
diretorio_malignant = "/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/malignant"

image_count = 0

for subdir, dirs, files in os.walk(diretorio_malignant):
    for file in files:
        if file.endswith(('.png')):
            image_count += 1

print(f"Total de imagens no diretório maligno: {image_count}")

In [None]:
# Quantidade de imagens corrompidas
count_corrupted

In [None]:
df_images = pd.DataFrame(dataframe_list, columns=['Path', 'Corrupted', 'Hash', 'Format', 'Width', 'Height', 'Channels', 'Folder'])
print(f"Imagens corrompidas: {count_corrupted}")
print(df_images.head(15))

In [None]:
# Verifica valores ausentes
valores_ausentes = df.isnull().sum()

print("Valores ausentes em cada coluna:")
print(valores_ausentes[valores_ausentes > 0])

In [None]:
# Definindo limites esperados para largura e altura
largura_minima = 150
largura_maxima = 800
altura_minima = 150
altura_maxima = 1200

# Verificando dimensões inconsistentes
dimensoes_inconsistentes = df[(df['width'] < largura_minima) |
                               (df['width'] > largura_maxima) |
                               (df['height'] < altura_minima) |
                               (df['height'] > altura_maxima)]

# Exibindo resultados
if not dimensoes_inconsistentes.empty:
    print("Dimensões inconsistentes encontradas:")
    print(dimensoes_inconsistentes[['image_path', 'width', 'height']])
else:
    print("Todas as dimensões das imagens estão dentro dos limites esperados.")

In [None]:
print(df.dtypes)

In [None]:
duplicatas = df[df.duplicated(subset='image_path', keep=False)]
if not duplicatas.empty:
    print("Imagens duplicadas encontradas:")
    print(duplicatas[['image_path']])
else:
    print("Não foram encontradas imagens duplicadas.")

In [None]:
# Função para calcular o hash da imagem
def calculate_image_hash(image_path):
    image = Image.open(image_path)
    return imagehash.average_hash(image)

# Dicionário para armazenar hashes e seus caminhos de imagem
hash_dict = defaultdict(list)

# Percorrer diretório e subdiretórios
for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
            file_path = os.path.join(subdir, file)
            img_hash = calculate_image_hash(file_path)
            hash_dict[img_hash].append(file_path)

# Encontrar e listar imagens duplicadas
duplicates = {k: v for k, v in hash_dict.items() if len(v) > 1}

# Exibir resultados
if duplicates:
    count = 0
    print("Imagens duplicadas encontradas:")
    for img_hash, paths in duplicates.items():
        for path in paths:
            count = count + 1
    print(f" - {count}")
else:
    print("Nenhuma imagem duplicada encontrada.")


In [None]:
import os
import cv2
import imagehash
from PIL import Image
from collections import defaultdict

# Diretório raiz
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

# Função para calcular o hash da imagem
def calculate_image_hash(image_path):
    image = Image.open(image_path)
    return imagehash.average_hash(image)

# Verificar e remover duplicatas
hash_dict = defaultdict(list)
for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
            file_path = os.path.join(subdir, file)
            img_hash = calculate_image_hash(file_path)
            hash_dict[img_hash].append(file_path)

# Encontrar e remover duplicatas (mantendo apenas uma cópia)
for img_hash, paths in hash_dict.items():
    if len(paths) > 1:
        for path in paths[1:]:
            os.remove(path)

# Contar e salvar imagens restantes em cada pasta
counts = {'benign': 0, 'malignant': 0, 'normal': 0}
for category in counts.keys():
    category_dir = os.path.join(root_dir, category)
    for _, _, files in os.walk(category_dir):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                counts[category] += 1
                file_path = os.path.join(category_dir, file)
                # Reabrir e salvar a imagem para confirmar que está salva
                with Image.open(file_path) as img:
                    img.save(file_path)

# Exibir contagem de imagens restantes
for category, count in counts.items():
    print(f'Pasta {category}: {count} imagens restantes.')

In [None]:
import os

# Diretório raiz
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

# Função para contar imagens em cada categoria
def count_images(directory):
    counts = {'benign': 0, 'malignant': 0, 'normal': 0}

    for category in counts.keys():
        category_dir = os.path.join(directory, category)
        if os.path.isdir(category_dir):
            for file in os.listdir(category_dir):
                if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                    counts[category] += 1

    return counts

# Chamar a função e exibir os resultados
image_counts = count_images(root_dir)

for category, count in image_counts.items():
    print(f'Pasta {category}: {count} imagens')


In [None]:
def plot_distribution(counts):
    categories = list(counts.keys())
    values = list(counts.values())

    plt.figure(figsize=(8, 6))
    plt.bar(categories, values, color=['blue', 'red', 'green'])
    plt.title("Distribuição de Imagens por Classe Após Remoção de Duplicatas")
    plt.xlabel("Classes")
    plt.ylabel("Número de Imagens")
    plt.show()

# Chamar a função para gerar o gráfico
plot_distribution(counts)

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image

# Tamanhos desejados
new_width = 224
new_height = 224

# Função para redimensionar, salvar e mostrar imagens
def process_and_save_images(directory, num_images=4):
    images_displayed = 0
    plt.figure(figsize=(10, 10))

    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                if images_displayed >= num_images:
                    break
                file_path = os.path.join(subdir, file)
                image = cv2.imread(file_path)

                # Redimensionar a imagem
                resized_image = cv2.resize(image, (new_width, new_height))

                # Salvar a imagem redimensionada
                cv2.imwrite(file_path, resized_image)

                # Mostrar as imagens original e redimensionada
                plt.subplot(2, 4, images_displayed * 2 + 1)
                plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                plt.title('Imagem Original')
                plt.axis('off')

                plt.subplot(2, 4, images_displayed * 2 + 2)
                plt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
                plt.title('Imagem Redimensionada')
                plt.axis('off')

                images_displayed += 1

        if images_displayed >= num_images:
            break

    plt.tight_layout()
    plt.show()
    

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image

# Diretório raiz
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

In [None]:
# Tamanhos desejados
new_width = 224
new_height = 224

# Função para redimensionar e salvar imagens
def resize_and_save_images(directory):
    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                file_path = os.path.join(subdir, file)

                # Abrir a imagem
                with Image.open(file_path) as img:
                    # Redimensionar a imagem
                    resized_image = img.resize((new_width, new_height))
                    # Salvar a imagem redimensionada
                    resized_image.save(file_path)


In [None]:
# Função para mostrar imagens redimensionadas
def display_images(directory, num_images=4):
    images_displayed = 0
    plt.figure(figsize=(10, 10))

    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                if images_displayed >= num_images:
                    break
                file_path = os.path.join(subdir, file)

                # Abrir a imagem redimensionada
                image = cv2.imread(file_path)

                # Mostrar a imagem redimensionada
                plt.subplot(1, num_images, images_displayed + 1)
                plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                plt.title('Imagem Redimensionada')
                plt.axis('off')

                images_displayed += 1

    plt.tight_layout()
    plt.show()


In [None]:
# Função para obter as dimensões das imagens
def get_image_dimensions(directory):
    for folder in os.listdir(directory):
        folder_path = os.path.join(directory, folder)
        if os.path.isdir(folder_path):
            for image_name in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_name)
                try:
                    with Image.open(image_path) as img:
                        width, height = img.size
                        print(f'Imagem: {image_name} | Dimensões: {width}x{height}')
                except Exception as e:
                    print(f"Erro ao abrir {image_name}: {e}")


In [None]:
resize_and_save_images(root_dir)
display_images(root_dir)
get_image_dimensions(root_dir)


In [None]:
# Processar e mostrar as imagens no diretório raiz
for category in counts.keys():
    print(f'\nMostrando imagens da pasta: {category}')
    category_dir = os.path.join(root_dir, category)
    process_and_display_images(category_dir)

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

# Diretório raiz
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

# Função para normalizar, salvar e mostrar imagens
def normalize_save_and_display_images(directory, num_images=4):
    images_displayed = 0
    plt.figure(figsize=(10, 10))

    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                file_path = os.path.join(subdir, file)

                # Abrir a imagem
                image = cv2.imread(file_path)
                if image is None:
                    print(f"Erro ao abrir a imagem: {file_path}")
                    continue

                # Normalizar a imagem (valores entre 0 e 1)
                normalized_image = image / 255.0

                # Converter de volta para uint8 para salvar
                normalized_image_uint8 = (normalized_image * 255).astype(np.uint8)

                # Salvar a imagem normalizada
                cv2.imwrite(file_path, normalized_image_uint8)

                # Mostrar as imagens original e normalizada
                if images_displayed < num_images:
                    plt.subplot(2, 4, images_displayed * 2 + 1)
                    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                    plt.title('Imagem Original')
                    plt.axis('off')

                    plt.subplot(2, 4, images_displayed * 2 + 2)
                    plt.imshow(cv2.cvtColor(normalized_image_uint8, cv2.COLOR_BGR2RGB))
                    plt.title('Imagem Normalizada')
                    plt.axis('off')

                    images_displayed += 1

        if images_displayed >= num_images:
            break

    plt.tight_layout()
    plt.show()

# Processar, normalizar, salvar e mostrar as imagens no diretório raiz
for category in ['benign', 'malignant', 'normal']:
    print(f'\nMostrando imagens da pasta: {category}')
    category_dir = os.path.join(root_dir, category)
    normalize_save_and_display_images(category_dir)



In [None]:
# Função para corrigir o contraste usando CLAHE
def adjust_contrast_clahe(directory):
    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                file_path = os.path.join(subdir, file)
                image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)

                # Aplicar CLAHE para realce adaptativo de contraste
                clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
                clahe_image = clahe.apply(image)

                # Salvar a imagem com CLAHE aplicado
                cv2.imwrite(file_path, clahe_image)

# Aplicar CLAHE nas imagens do diretório raiz
for category in counts.keys():
    category_dir = os.path.join(root_dir, category)
    adjust_contrast_clahe(category_dir)

In [None]:
# Função para reduzir ruído
def denoise_images(directory):
    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                file_path = os.path.join(subdir, file)
                image = cv2.imread(file_path)

                # Redução de ruído usando filtro bilateral
                denoised_image = cv2.bilateralFilter(image, d=9, sigmaColor=75, sigmaSpace=75)

                # Salvar a imagem com ruído reduzido
                cv2.imwrite(file_path, denoised_image)

# Reduzir o ruído das imagens no diretório raiz
for category in counts.keys():
    category_dir = os.path.join(root_dir, category)
    denoise_images(category_dir)

In [None]:
# Função para mostrar algumas imagens
def display_images(directory, num_images=4):
    images_displayed = 0
    plt.figure(figsize=(10, 10))

    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                if images_displayed >= num_images:
                    break
                file_path = os.path.join(subdir, file)
                image = cv2.imread(file_path)

                # Mostrar a imagem
                plt.subplot(1, num_images, images_displayed + 1)
                plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                plt.title(f'Imagem {images_displayed + 1}')
                plt.axis('off')

                images_displayed += 1

        if images_displayed >= num_images:
            break

    plt.tight_layout()
    plt.show()

# Processar e mostrar as imagens no diretório raiz
for category in counts.keys():
    print(f'\nMostrando imagens da pasta: {category}')
    category_dir = os.path.join(root_dir, category)
    display_images(category_dir)


In [None]:
import os
import numpy as np
from PIL import Image
import imgaug.augmenters as iaa
import matplotlib.pyplot as plt

# Função para fazer data augmentation e salvar novas imagens
def augment_images(input_dir, output_dir, target_count):
    images = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
    current_count = len(images)

    if current_count >= target_count:
        print(f"A pasta {input_dir} já está balanceada.")
        return current_count

    num_to_create = target_count - current_count
    aug = iaa.Sequential([
        iaa.Affine(rotate=(-25, 25)),
        iaa.Fliplr(0.5),
        iaa.Multiply((0.8, 1.2)),
    ])

    print(f"Criando {num_to_create} novas imagens para {input_dir}...")

    for i in range(num_to_create):
        img_path = np.random.choice(images)
        image = Image.open(img_path)
        image_array = np.array(image)

        augmented_image = aug(image=image_array)
        augmented_pil = Image.fromarray(augmented_image)

        save_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(img_path))[0]}_aug_{i}.png")
        augmented_pil.save(save_path)

    return len(os.listdir(input_dir))

# Função para contar imagens
def count_images(directory, categories):
    counts = {}
    for category in categories:
        category_dir = os.path.join(directory, category)
        counts[category] = len([f for f in os.listdir(category_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])
    return counts

# Caminho do diretório raiz
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'
categories = ['benign', 'malignant', 'normal']

# Contagem antes do balanceamento
counts_before = count_images(root_dir, categories)
print("Contagem antes do balanceamento:", counts_before)

# Balancear as classes
target_count = counts_before['benign']  # Número de imagens de 'benign'
counts_after = {
    'benign': counts_before['benign'],
    'malignant': augment_images(os.path.join(root_dir, 'malignant'), os.path.join(root_dir, 'malignant'), target_count),
    'normal': augment_images(os.path.join(root_dir, 'normal'), os.path.join(root_dir, 'normal'), target_count)
}

# Contagem após o balanceamento
counts_after = count_images(root_dir, categories)
print("Contagem após o balanceamento:", counts_after)

# Plotar gráfico de distribuição
def plot_distribution(counts_before, counts_after):
    categories = counts_before.keys()
    values_before = list(counts_before.values())
    values_after = list(counts_after.values())

    x = np.arange(len(categories))
    width = 0.35

    plt.figure(figsize=(10, 6))
    plt.bar(x - width/2, values_before, width, label='Antes do Balanceamento', color='skyblue')
    plt.bar(x + width/2, values_after, width, label='Depois do Balanceamento', color='orange')

    plt.xlabel('Classes')
    plt.ylabel('Número de Imagens')
    plt.title('Distribuição de Imagens Antes e Depois do Balanceamento')
    plt.xticks(x, categories)
    plt.legend()

    plt.show()

# Exibir o gráfico
plot_distribution(counts_before, counts_after)


In [None]:
import os
from PIL import Image

# Defina o diretório onde as imagens estão armazenadas
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

# Função para obter as dimensões das imagens
def get_image_dimensions(root_dir):
    for folder in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder)
        if os.path.isdir(folder_path):
            for image_name in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_name)
                try:
                    # Abra a imagem e obtenha suas dimensões
                    with Image.open(image_path) as img:
                        width, height = img.size
                        print(f'Imagem: {image_name} | Dimensões: {width}x{height}')
                except Exception as e:
                    print(f"Erro ao abrir {image_name}: {e}")

# Chame a função
get_image_dimensions(root_dir)

In [None]:
import os

# Diretório raiz
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

# Função para contar imagens em cada categoria
def count_images(directory):
    counts = {'benign': 0, 'malignant': 0, 'normal': 0}

    for category in counts.keys():
        category_dir = os.path.join(directory, category)
        if os.path.isdir(category_dir):
            for file in os.listdir(category_dir):
                if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                    counts[category] += 1

    return counts

# Chamar a função e exibir os resultados
image_counts = count_images(root_dir)

for category, count in image_counts.items():
    print(f'Pasta {category}: {count} imagens')


In [None]:
# Diretório raiz
root_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'

# Função para carregar imagens e máscaras
def load_images_and_masks(directory):
    images = []
    masks = []

    for label in ['benign', 'malignant', 'normal']:
        class_dir = os.path.join(directory, label)

        if not os.path.exists(class_dir):
            print(f"Warning: Directory not found: {class_dir}")
            continue

        for file in os.listdir(class_dir):
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                image_path = os.path.join(class_dir, file)

                mask_file_name = file.split('.')[0] + '_mask.png'
                mask_path = os.path.join(class_dir, mask_file_name)

                if not os.path.exists(mask_path):
                    print(f"Warning: Mask file not found for image: {image_path}")
                    continue

                image = cv2.imread(image_path)
                image = cv2.resize(image, (224, 224)).astype('float32') / 255.0  # Normalizar e converter para float32

                mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                if mask is not None:
                    mask = cv2.resize(mask, (224, 224)).astype('float32') / 255.0  # Normalizar e converter para float32
                else:
                    print(f"Warning: Failed to load mask: {mask_path}")
                    continue

                images.append(image)
                masks.append(mask)

    if len(images) == 0 or len(masks) == 0:
        print("Erro: Nenhuma imagem ou máscara foi carregada.")
        return np.array([]), np.array([])

    return np.array(images), np.array(masks)

# Carregar imagens e máscaras
X, y = load_images_and_masks(root_dir)

# Verificar se as imagens e máscaras foram carregadas corretamente
if X.size == 0 or y.size == 0:
    print("Erro ao carregar as imagens ou máscaras.")
else:
    print(f"Total de imagens carregadas: {X.shape[0]}")
    print(f"Total de máscaras carregadas: {y.shape[0]}")

    # Dividir os dados em treino e validação
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    print(f"Dados de treino: {X_train.shape}, {y_train.shape}")
    print(f"Dados de validação: {X_val.shape}, {y_val.shape}")


In [None]:
def build_resnet50_unet(input_shape):
    inputs = Input(shape=input_shape)
    base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=inputs)

    # Encoder: Extração de características
    c1 = base_model.get_layer("conv1_relu").output
    c2 = base_model.get_layer("conv2_block3_out").output
    c3 = base_model.get_layer("conv3_block4_out").output
    c4 = base_model.get_layer("conv4_block6_out").output
    c5 = base_model.get_layer("conv5_block3_out").output

    # Decoder: Construção da U-Net
    u1 = UpSampling2D(size=(2, 2))(c5)
    u1 = Conv2D(512, (3, 3), activation="relu", padding="same")(u1)
    u1 = concatenate([u1, c4])

    u2 = UpSampling2D(size=(2, 2))(u1)
    u2 = Conv2D(256, (3, 3), activation="relu", padding="same")(u2)
    u2 = concatenate([u2, c3])

    u3 = UpSampling2D(size=(2, 2))(u2)
    u3 = Conv2D(128, (3, 3), activation="relu", padding="same")(u3)
    u3 = concatenate([u3, c2])

    u4 = UpSampling2D(size=(2, 2))(u3)
    u4 = Conv2D(64, (3, 3), activation="relu", padding="same")(u4)
    u4 = concatenate([u4, c1])

    u5 = UpSampling2D(size=(2, 2))(u4)
    u5 = Conv2D(32, (3, 3), activation="relu", padding="same")(u5)

    outputs = Conv2D(1, (1, 1), activation="sigmoid")(u5)

    model = Model(inputs, outputs)
    return model

model = build_resnet50_unet((224, 224, 3))
model.summary()


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam

model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True, verbose=1)

history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=16,
    validation_data=(X_val, y_val),
    callbacks=[checkpoint]
)

plt.plot(history.history['loss'], label='Perda de Treinamento')
plt.plot(history.history['val_loss'], label='Perda de Validação')
plt.title('Curva de Perda')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()
plt.show()

loss, accuracy = model.evaluate(X_val, y_val)
print(f'Perda: {loss}')
print(f'Acurácia: {accuracy}')


In [None]:
# Perda combinada
from tensorflow.keras import backend as K

def dice_loss(y_true, y_pred):
    intersection = K.sum(K.abs(y_true * y_pred), axis=[1,2,3])
    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=[1,2,3])
    return 1 - (2. * intersection + 1.) / (sum_ + 1.)

def bce_dice_loss(y_true, y_pred):
    bce_loss = K.binary_crossentropy(y_true, y_pred)
    dice_loss_val = dice_loss(y_true, y_pred)
    return bce_loss + dice_loss_val


In [None]:
# Aumento de dados com Keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)


In [None]:
from tensorflow.keras import layers, models

def unet(input_size=(256, 256, 1)):
    inputs = layers.Input(input_size)

    # Encoder
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)

    # Decoder
    up3 = layers.UpSampling2D(size=(2, 2))(conv2)
    concat3 = layers.concatenate([up3, conv1], axis=3)
    conv3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(concat3)
    conv3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv3)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(conv3)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    return model

model = unet()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255)  # Normalização para [0, 1]


In [None]:
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Normalizar as imagens
datagen = ImageDataGenerator(rescale=1./255)  # Normaliza para [0, 1]

# Dados para treinamento
train_generator = datagen.flow_from_directory(
    '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT',  # Caminho do diretório
    target_size=(256, 256),  # Tamanho das imagens de entrada
    batch_size=32,  # Tamanho do lote
    class_mode='categorical',  # Rótulos em formato one-hot codificado (múltiplas classes)
    shuffle=True,
)


In [None]:
import os
import shutil
import random

# Caminhos dos diretórios
val_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/val'
test_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/test'

# Verificar se o diretório de validação existe
if not os.path.exists(val_dir):
    print(f"O diretório {val_dir} não foi encontrado!")
else:
    # Criar as pastas de teste se não existirem
    os.makedirs(test_dir, exist_ok=True)

    # Classes: benign, malignant, normal
    classes = ['benign', 'malignant', 'normal']

    # Dividir 50% dos arquivos de validação para o diretório de teste
    for class_name in classes:
        val_class_dir = os.path.join(val_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)

        # Verificar se a pasta da classe existe
        if not os.path.exists(val_class_dir):
            print(f"Erro: O diretório {val_class_dir} não existe.")
            continue

        # Criar o diretório da classe dentro de test, se não existir
        os.makedirs(test_class_dir, exist_ok=True)

        # Obter todos os arquivos de imagem da classe
        images = [img for img in os.listdir(val_class_dir) if img.endswith(('jpg', 'jpeg', 'png'))]

        # Verificar se há imagens
        if len(images) == 0:
            print(f"A classe {class_name} não contém imagens.")
            continue

        # Selecionar 50% das imagens para mover para o diretório de teste
        num_test_images = len(images) // 2
        test_images = random.sample(images, num_test_images)

        # Mover as imagens para o diretório de teste
        for image in test_images:
            src_path = os.path.join(val_class_dir, image)
            dst_path = os.path.join(test_class_dir, image)
            shutil.move(src_path, dst_path)

    print("Imagens de validação separadas para teste com sucesso!")


In [None]:
import os

dataset_dir = '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'
print("Estrutura do diretório:", os.listdir(dataset_dir))


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Exemplo de um modelo com camadas convolucionais
model = Sequential([
    # Camada de entrada com 3 canais (RGB)
    Conv2D(32, (3, 3), input_shape=(256, 256, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes
])

# Compilar o modelo
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Resumo do modelo
model.summary()


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Gerador de treinamento
train_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/train',  # Caminho para o diretório de treino
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical',
)

# Gerador de validação
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/val',  # Caminho para a pasta de validação
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical',
)

# Gerador de teste
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    '/content/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/test',
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical',
)

# Treinamento do modelo
model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10,  # Número de épocas
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)

# Avaliação no conjunto de teste
test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
print(f"Test accuracy: {test_acc}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def to_numpy(value):
    # Se o valor for um tensor, converta para NumPy
    if isinstance(value, np.ndarray):  # Verifica se é um array do NumPy
        return value
    elif isinstance(value, float):  # Se for float, retorne o valor diretamente
        return value
    else:  # Caso seja um tensor PyTorch, converta
        return value.cpu().detach().numpy()

def plot_metrics(metrics):
    num_epochs = len(metrics['train_losses'])
    epochs = np.arange(1, num_epochs + 1)

    # Convertendo as métricas para arrays NumPy ou valores diretos
    train_losses_np = metrics['train_losses']
    val_losses_np = metrics['val_losses']
    train_dices_np = [to_numpy(dice) for dice in metrics['train_dices']]
    val_dices_np = [to_numpy(dice) for dice in metrics['val_dices']]

    # Plot Losses
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses_np, label='Train Loss')
    plt.plot(epochs, val_losses_np, label='Val Loss')
    plt.title('Training and Validation Losses')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Plot Dice Coefficients
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_dices_np, label='Train Dice')
    plt.plot(epochs, val_dices_np, label='Val Dice')
    plt.title('Training and Validation Dice Coefficients')
    plt.xlabel('Epoch')
    plt.ylabel('Dice Coefficient')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Exemplo de estrutura de métricas para testar
metrics = {
    'train_losses': np.random.rand(10),  # Exemplo de perdas de treino
    'val_losses': np.random.rand(10),  # Exemplo de perdas de validação
    'train_dices': [np.random.rand() for _ in range(10)],  # Exemplo de Dice para treino
    'val_dices': [np.random.rand() for _ in range(10)]  # Exemplo de Dice para validação
}

# Plotar as métricas
plot_metrics(metrics)

In [None]:
# Gerar as previsões ignorando o último lote
steps_per_epoch = test_generator.samples // test_generator.batch_size
predictions = model.predict(test_generator, steps=steps_per_epoch, verbose=1)

# Converter as previsões de probabilidade para rótulos de classe
predicted_classes = np.argmax(predictions, axis=1)

# Obter os rótulos reais
true_classes = test_generator.classes[:len(predicted_classes)]  # Garantir que os rótulos reais tenham o mesmo comprimento

# Gerar a matriz de confusão
cm = confusion_matrix(true_classes, predicted_classes)

# Visualizar a matriz de confusão
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=test_generator.class_indices.keys(), yticklabels=test_generator.class_indices.keys())
plt.title('Matriz de Confusão')
plt.xlabel('Previsões')
plt.ylabel('Valores Reais')
plt.show()
