In [2]:
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
import random
import shutil
try:
    from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, save_img
except ImportError:
    raise ImportError("Certifique-se de que o Pillow está instalado. Execute 'pip install pillow'.")

from ultralytics import YOLO

In [6]:
def apply_data_augmentation_inplace(input_path, augmentations_per_image=5):
    """
    Aplica Data Augmentation nas imagens de uma pasta e salva as imagens aumentadas na mesma pasta.

    Parâmetros:
    - input_path: Caminho para a pasta contendo as imagens originais.
    - augmentations_per_image: Número de imagens aumentadas a serem geradas por imagem original.
    """
    # Criar o gerador de imagens com as transformações desejadas
    datagen = ImageDataGenerator(
        rotation_range=40,         # Rotação aleatória entre -40 e +40 graus
        width_shift_range=0.2,     # Deslocamento horizontal
        height_shift_range=0.2,    # Deslocamento vertical
        shear_range=0.2,           # Transformação de cisalhamento
        zoom_range=0.2,            # Zoom aleatório
        horizontal_flip=True,      # Espelhamento horizontal
        fill_mode='nearest'        # Preenchimento de pixels faltantes
    )

    # Percorrer todas as imagens na pasta de entrada
    for filename in tqdm(os.listdir(input_path)):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Carregar a imagem
            img_path = os.path.join(input_path, filename)
            img = load_img(img_path)  # Carrega a imagem como objeto PIL
            img_array = img_to_array(img)  # Converte para array NumPy
            img_array = np.expand_dims(img_array, axis=0)  # Expande dimensões para batch

            # Gerar e salvar as imagens aumentadas
            image_gen = datagen.flow(img_array, batch_size=1)

            for i in range(augmentations_per_image):
                # Gerar a imagem aumentada
                augmented_image = next(image_gen)[0].astype('uint8')

                # Gerar o novo nome para evitar sobrescrita
                augmented_filename = f"{os.path.splitext(filename)[0]}_aug_{i}.jpg"
                augmented_path = os.path.join(input_path, augmented_filename)

                # Salvar a imagem aumentada na mesma pasta
                save_img(augmented_path, augmented_image)

    print(f"Data Augmentation concluído! Imagens aumentadas salvas na pasta: {input_path}")

In [7]:
# Exemplo de uso
input_path = r'normal-cancer\cancer'

In [8]:
apply_data_augmentation_inplace(input_path)

100%|██████████| 1802/1802 [50:30<00:00,  1.68s/it]  

Data Augmentation concluído! Imagens aumentadas salvas na pasta: normal-cancer\cancer





In [13]:
def downsample_images(input_path, output_path, target_count=10812):
    """
    Seleciona aleatoriamente um número especificado de imagens da pasta de entrada
    e as move para a pasta de saída.
    
    Parâmetros:
    - input_path: Caminho da pasta com as imagens originais.
    - output_path: Caminho para a pasta onde as imagens selecionadas serão salvas.
    - target_count: Número de imagens que queremos selecionar aleatoriamente.
    """
    # Listar todas as imagens na pasta de entrada
    all_images = [f for f in os.listdir(input_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Selecionar aleatoriamente o número desejado de imagens
    selected_images = random.sample(all_images, target_count)

    # Criar a pasta de saída, se não existir
    os.makedirs(output_path, exist_ok=True)

    # Mover as imagens selecionadas para a pasta de saída
    for image in tqdm(selected_images):
        src = os.path.join(input_path, image)
        dst = os.path.join(output_path, image)
        shutil.copy(src, dst)

    print(f"{target_count} imagens movidas para {output_path}")

In [14]:
input_path = r'normal-cancer\normal'
output_path = r'normal-cancer\normais_balanceadas'

In [15]:
downsample_images(input_path, output_path)

100%|██████████| 10812/10812 [00:29<00:00, 370.60it/s]

10812 imagens movidas para normal-cancer\normais_balanceadas





In [17]:
# Load a model
model = YOLO("yolov8n-cls.pt") 

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-cls.pt to 'yolov8n-cls.pt'...


100%|██████████| 5.31M/5.31M [00:00<00:00, 9.76MB/s]


In [18]:
# Train the model
results = model.train(data="data", epochs=10, imgsz=256)

Ultralytics 8.3.18  Python-3.11.7 torch-2.5.0+cpu CPU (AMD Ryzen 5 5600X 6-Core Processor)
[34m[1mengine\trainer: [0mtask=classify, mode=train, model=yolov8n-cls.pt, data=data, epochs=10, time=None, patience=100, batch=16, imgsz=256, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show

[34m[1mtrain: [0mScanning C:\Users\diogo\OneDrive\Área de Trabalho\DEV\breast-cancer-prediction\data\train... 21580 images, 0 corrupt: 100%|██████████| 21580/21580 [00:11<00:00, 1888.89it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\data\train.cache


[34m[1mval: [0mScanning C:\Users\diogo\OneDrive\Área de Trabalho\DEV\breast-cancer-prediction\data\test... 44 images, 0 corrupt: 100%|██████████| 44/44 [00:00<00:00, 2173.52it/s]

[34m[1mval: [0mNew cache created: C:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\data\test.cache
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 26 weight(decay=0.0), 27 weight(decay=0.0005), 27 bias(decay=0.0)
Image sizes 256 train, 256 val
Using 0 dataloader workers
Logging results to [1mc:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\runs\classify\train[0m
Starting training for 10 epochs...

      Epoch    GPU_mem       loss  Instances       Size



       1/10         0G     0.4899         12        256: 100%|██████████| 1349/1349 [12:27<00:00,  1.80it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.06it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


       2/10         0G     0.3883         12        256: 100%|██████████| 1349/1349 [12:33<00:00,  1.79it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.26it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


       3/10         0G     0.3695         12        256: 100%|██████████| 1349/1349 [11:48<00:00,  1.91it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.19it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


       4/10         0G     0.3448         12        256: 100%|██████████| 1349/1349 [11:46<00:00,  1.91it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.27it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


       5/10         0G     0.3346         12        256: 100%|██████████| 1349/1349 [11:49<00:00,  1.90it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.22it/s]

                   all      0.977          1






      Epoch    GPU_mem       loss  Instances       Size


       6/10         0G     0.3201         12        256: 100%|██████████| 1349/1349 [11:47<00:00,  1.91it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.29it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


       7/10         0G     0.3096         12        256: 100%|██████████| 1349/1349 [12:04<00:00,  1.86it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.18it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


       8/10         0G     0.2984         12        256: 100%|██████████| 1349/1349 [11:49<00:00,  1.90it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.19it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


       9/10         0G     0.2934         12        256: 100%|██████████| 1349/1349 [11:51<00:00,  1.89it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.27it/s]

                   all      0.955          1






      Epoch    GPU_mem       loss  Instances       Size


      10/10         0G     0.2841         12        256: 100%|██████████| 1349/1349 [11:48<00:00,  1.90it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.20it/s]

                   all      0.955          1






10 epochs completed in 1.999 hours.
Optimizer stripped from c:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\runs\classify\train\weights\last.pt, 3.0MB
Optimizer stripped from c:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\runs\classify\train\weights\best.pt, 3.0MB

Validating c:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\runs\classify\train\weights\best.pt...
Ultralytics 8.3.18  Python-3.11.7 torch-2.5.0+cpu CPU (AMD Ryzen 5 5600X 6-Core Processor)
YOLOv8n-cls summary (fused): 73 layers, 1,437,442 parameters, 0 gradients, 3.3 GFLOPs
[34m[1mtrain:[0m C:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\data\train... found 21580 images in 2 classes  
[34m[1mval:[0m None...
[34m[1mtest:[0m C:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\data\test... found 44 images in 2 classes  


               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00,  3.22it/s]


                   all      0.977          1
Speed: 0.0ms preprocess, 3.1ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mc:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\runs\classify\train[0m


In [4]:
model_trained = YOLO(r"runs\classify\train\weights\best.pt")

In [12]:
results = model_trained(r"data\test\cancer\55148_926538150_aug_0.jpg")


image 1/1 c:\Users\diogo\OneDrive\rea de Trabalho\DEV\breast-cancer-prediction\data\test\cancer\55148_926538150_aug_0.jpg: 256x256 cancer 1.00, normal 0.00, 8.4ms
Speed: 6.8ms preprocess, 8.4ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 256)


In [13]:
results[0].save("output.png")

'output.png'