<h1 style="font-size: 40px; color: Gold">Breast Cancer Detection Using Ultrasound _BUSI_</h1>

<h1 style="font-size: 30px; color: green;"> Imports </h1>

In [16]:
import os
import shutil
import random
import cv2 
import numpy as np
from matplotlib import pyplot as plt

<h1 style="font-size: 30px; color: lightseagreen;">Annotated your masked data</h1>

<h1 style="font-size:20px">Funtion to annotate the data</h1>

In [18]:
def convert_mask_to_yolo(mask_path, class_id, img_width, img_height, output_txt):
    # Ensure the directory for the output file exists
    os.makedirs(os.path.dirname(output_txt), exist_ok=True)

    # Read the mask
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    
    if mask is None:
        raise FileNotFoundError(f"Mask image not found: {mask_path}")

    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    with open(output_txt, 'w') as file:
        # For each contour (each object)
        for contour in contours:
            # Get the bounding box for the contour
            x, y, w, h = cv2.boundingRect(contour)
            
            # Convert to YOLO format
            x_center = (x + w / 2) / img_width
            y_center = (y + h / 2) / img_height
            width = w / img_width
            height = h / img_height

            # Write the class_id and bounding box to the output file
            file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
    
    return mask

<h1 style="font-size:20px">Create Labels for each directory </h1>

In [34]:
# Define paths and classes
base_path = "dataset"
classes = ["benign", "malignant", "normal"]
class_ids = { "benign": 0, "malignant": 1, "normal": 2 }
image_width = 1280  # Replace with the actual width of your images
image_height = 720   # Replace with the actual height of your images

# Iterate through each class and convert masks
for class_name in classes:
    # Define the path for masked images and the output labels
    mask_dir = os.path.join(base_path, class_name, "masked")
    output_label_dir = os.path.join(base_path, class_name, "labels")

    # Iterate through each mask image in the masked directory
    for mask_file in os.listdir(mask_dir):
        if mask_file.endswith(".png") or mask_file.endswith(".jpg"):  # Adjust based on your file formats
            mask_image_path = os.path.join(mask_dir, mask_file)
            
            # Change the output label path to match the mask file naming convention
            output_label_name = mask_file.replace("_mask.png", "").replace("_mask.jpg", "").replace(".png", "").replace(".jpg", "")
            output_label_path = os.path.join(output_label_dir, f"{output_label_name}.txt")
            
            # Convert mask and get the mask image for displaying
            mask_image = convert_mask_to_yolo(mask_image_path, class_ids[class_name], image_width, image_height, output_label_path)

           


<h1 style="color: limegreen; font-size:30px;">Pre-Processing on dataset - <span style="color: lightgreen;">Split the dataset into train test and val</span></h1>

In [41]:
import os
import shutil
import random

def create_dataset_split(base_path):
    # Define paths for each category
    categories = ['benign', 'malignant', 'normal']
    dataset_split = {
        'train': {
            'images': os.path.join(base_path, 'train', 'images'),
            'labels': os.path.join(base_path, 'train', 'labels')
        },
        'test': {
            'images': os.path.join(base_path, 'test', 'images'),
            'labels': os.path.join(base_path, 'test', 'labels')
        },
        'val': {
            'images': os.path.join(base_path, 'val', 'images'),
            'labels': os.path.join(base_path, 'val', 'labels')
        }
    }

    # Create directories for train, test, and val splits
    for split in dataset_split.values():
        os.makedirs(split['images'], exist_ok=True)
        os.makedirs(split['labels'], exist_ok=True)

    # Process each category
    for category in categories:
        category_path = os.path.join(base_path, category)

        # Collect image and label file paths
        images = []
        labels = []

        # Get images
        image_dir = os.path.join(category_path, category)
        label_dir = os.path.join(category_path, 'labels')
        
        images.extend([os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])
        labels.extend([os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.endswith('.txt')])
        
        # Shuffle the files
        combined = list(zip(images, labels))
        random.shuffle(combined)
        images, labels = zip(*combined)

        # Calculate split indices
        total = len(images)
        train_end = int(total * 0.7)
        test_end = train_end + int(total * 0.2)

        # Move files to corresponding directories
        for i in range(total):
            if i < train_end:
                # Move to train
                shutil.copy(images[i], dataset_split['train']['images'])
                shutil.copy(labels[i], dataset_split['train']['labels'])
            elif i < test_end:
                # Move to test
                shutil.copy(images[i], dataset_split['test']['images'])
                shutil.copy(labels[i], dataset_split['test']['labels'])
            else:
                # Move to val
                shutil.copy(images[i], dataset_split['val']['images'])
                shutil.copy(labels[i], dataset_split['val']['labels'])

# Define your base path
base_path = './dataset'  # Update this to your path

# Call the function to split the dataset
create_dataset_split(base_path)

print("Dataset split completed!")


Dataset split completed!


<h1 style="color: lightblue; font-size:24px;">Delete the Extra folders</h1>

In [44]:
def delete_classes(base_path):
    # Define the class directories to be deleted
    classes_to_delete = ['benign', 'malignant', 'normal']

    # Loop through each class and delete the directory
    for class_name in classes_to_delete:
        class_dir = os.path.join(base_path, class_name)
        
        # Check if the directory exists
        if os.path.exists(class_dir):
            shutil.rmtree(class_dir)  # Remove the directory and its contents
            print(f"Deleted directory: {class_dir}")
        else:
            print(f"Directory not found: {class_dir}")

# Define your base path
base_path = './dataset'  # Update this to your path

# Call the function to delete the class directories
delete_classes(base_path)

print("Specified directories have been deleted.")


Deleted directory: ./dataset\benign
Deleted directory: ./dataset\malignant
Deleted directory: ./dataset\normal
Specified directories have been deleted.


<h1 style="color: PaleGoldenRod; font-size:24px;">Normalizing Dataset</h1>

In [48]:
import os
import cv2
import numpy as np

def normalize_images(dataset_path):
    # Loop through the train, val, and test directories
    for split in ['train', 'val', 'test']:
        split_path = os.path.join(dataset_path, split, 'images')  # Path to images

        # Check if the images directory exists
        if os.path.exists(split_path):
            for filename in os.listdir(split_path):
                if filename.endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(split_path, filename)
                    
                    # Load the image
                    img = cv2.imread(img_path)
                    
                    if img is not None:
                        # Normalize the image to [0, 1]
                        normalized_img = img / 255.0
                        
                        # Optionally, you can also normalize to [-1, 1]
                        # normalized_img = (img / 127.5) - 1

                        # Save the normalized image (you may choose to overwrite or save to a new directory)
                        cv2.imwrite(img_path, (normalized_img * 255).astype(np.uint8))
                        # print(f"Normalized and saved: {img_path}")
                    else:
                        print(f"Error loading image: {img_path}")
        else:
            print(f"Directory does not exist: {split_path}")

# Define your dataset path
dataset_path = './dataset'  # Update this to your path

# Call the normalization function
normalize_images(dataset_path)
print(f"Normalized and saved")


Normalized and saved


<h1 style="color: red; font-size:45px;">Implementation of YOLO V8 model</h1>

<h1 style="color: PaleGoldenRod; font-size:24px;">IMPORT YOLO MODEL</h1>

In [20]:
from ultralytics import YOLO
from IPython.display import display

# Load the YOLOV8 model
model = YOLO('yolov8n.pt') 

results = model.train(
    data='dataset.yaml',  
    epochs=1,  
    imgsz=640,
    batch=16,
    name='yolov8_breast_cancer' 
)

New https://pypi.org/project/ultralytics/8.3.4 available  Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.99  Python-3.12.1 torch-2.4.1+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset.yaml, epochs=1, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=yolov8_breast_cancer4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None,

[34m[1mtrain: [0mScanning C:\Users\Humaira Sadia\Desktop\College Lab\DS_ML_DL\Breast_Cancer\model\dataset\train\labels.cache... 2 [0m
[34m[1mval: [0mScanning C:\Users\Humaira Sadia\Desktop\College Lab\DS_ML_DL\Breast_Cancer\model\dataset\val\labels.cache... 0 imag[0m

Plotting labels to runs\detect\yolov8_breast_cancer4\labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\yolov8_breast_cancer4[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/40 [00:04<?, ?it/s]


KeyboardInterrupt: 

<h1 style="color: PaleGoldenRod; font-size:40px;">VALIDATION OF MODEL</h1>

In [10]:
results = model.val()

Ultralytics YOLOv8.2.99  Python-3.12.1 torch-2.4.1+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
Model summary (fused): 168 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning C:\Users\Humaira Sadia\Desktop\College Lab\DS_ML_DL\Breast_Cancer\model\dataset\val\labels.cache... 0 imag[0m




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:03<00

                   all         20          0          0          0          0          0





Speed: 3.6ms preprocess, 125.4ms inference, 0.0ms loss, 11.5ms postprocess per image
Results saved to [1mruns\detect\yolov8_breast_cancer2[0m


<h1 style="color: PaleGoldenRod; font-size:40px;">PERFORM OBJECT DETECTION</h1>

In [14]:
results = model("dataset/test/images/benign (1).png") 


image 1/1 C:\Users\Humaira Sadia\Desktop\BUSI\model\dataset\test\images\benign (1).png: 544x640 (no detections), 237.6ms
Speed: 8.7ms preprocess, 237.6ms inference, 1.0ms postprocess per image at shape (1, 3, 544, 640)
