In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm
import shutil

# Paths and settings
INPUT_DIR = '/kaggle/input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'
OUTPUT_DIR = '/kaggle/working/preprocessed_balanced_dataset'
TARGET_SIZE = (224, 224)
TARGET_COUNT = 891
CLASSES = ['benign', 'malignant', 'normal']

# Clean previous outputs
if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR, exist_ok=True)

# SEGMENTATION FUNCTION 
def apply_kmeans_segmentation(image, k=2):
    Z = image.reshape((-1, 1)).astype(np.float32)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    _, labels, centers = cv2.kmeans(Z, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    segmented_img = centers[labels.flatten()].reshape(image.shape)
    segmented_img = np.uint8(segmented_img)
    return segmented_img

#PREPROCESSING FUNCTION 
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, TARGET_SIZE)
    image = cv2.equalizeHist(image)
    image = cv2.GaussianBlur(image, (5, 5), 0)
    image = cv2.bilateralFilter(image, 9, 75, 75)
    image = apply_kmeans_segmentation(image) 
    return image

#SAVE IMAGES FUNCTION  
def save_preprocessed_images(class_name):
    input_path = os.path.join(INPUT_DIR, class_name)
    output_path = os.path.join(OUTPUT_DIR, class_name)
    os.makedirs(output_path, exist_ok=True)

    image_files = [f for f in os.listdir(input_path) if f.endswith('.png') and 'mask' not in f]
    count = 0

    for i, filename in enumerate(tqdm(image_files, desc=f"Preprocessing {class_name}")):
        if count >= TARGET_COUNT:
            break
        img_path = os.path.join(input_path, filename)
        processed = preprocess_image(img_path)
        cv2.imwrite(os.path.join(output_path, f"{class_name}_{i}.png"), processed)
        count += 1

    return output_path, count

#AUGMENTATION FUNCTION 
def augment_class_images(class_name, current_count):
    if current_count >= TARGET_COUNT:
        return

    datagen = ImageDataGenerator(
        rotation_range=15,
        zoom_range=0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    class_dir = os.path.join(OUTPUT_DIR, class_name)
    images = [f for f in os.listdir(class_dir) if f.endswith('.png')]
    img_idx = 0

    while current_count < TARGET_COUNT:
        img_path = os.path.join(class_dir, images[img_idx % len(images)])
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = img.reshape((1, img.shape[0], img.shape[1], 1))
        for batch in datagen.flow(img, batch_size=1, save_to_dir=class_dir,
                                  save_prefix='aug', save_format='png'):
            current_count += 1
            if current_count >= TARGET_COUNT:
                break
        img_idx += 1

#MAIN PREPROCESS + AUGMENT
for cls in CLASSES:
    out_dir, count = save_preprocessed_images(cls)
    print(f"{cls} class before augmentation: {count} images")
    augment_class_images(cls, count)
    final_count = len([f for f in os.listdir(out_dir) if f.endswith('.png')])
    print(f"{cls} class after augmentation: {final_count} images")

print("\n Processing and balancing completed.")

#MODEL DEFINITION 
IMG_SIZE = 224
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  
])
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("\n CNN model defined and compiled.")


2025-05-17 16:30:50.651224: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747499450.939146      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747499451.023972      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Preprocessing benign: 100%|██████████| 437/437 [00:37<00:00, 11.73it/s]


benign class before augmentation: 437 images
benign class after augmentation: 884 images


Preprocessing malignant: 100%|██████████| 210/210 [00:17<00:00, 11.75it/s]


malignant class before augmentation: 210 images
malignant class after augmentation: 864 images


Preprocessing normal: 100%|██████████| 133/133 [00:11<00:00, 11.73it/s]


normal class before augmentation: 133 images
normal class after augmentation: 864 images

✅ Processing and balancing completed.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-05-17 16:32:23.620101: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)



 CNN model defined and compiled.


In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator


datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator = datagen.flow_from_directory(
    OUTPUT_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_directory(
    OUTPUT_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

loss, acc = model.evaluate(val_generator)
print(f"\nFinal Accuracy: {acc:.4f}, Loss: {loss:.4f}")


Found 2092 images belonging to 3 classes.
Found 520 images belonging to 3 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 2s/step - accuracy: 0.6268 - loss: 2.4947 - val_accuracy: 0.9865 - val_loss: 0.0585
Epoch 2/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 2s/step - accuracy: 0.8844 - loss: 0.2795 - val_accuracy: 0.9942 - val_loss: 0.0268
Epoch 3/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 2s/step - accuracy: 0.9585 - loss: 0.1203 - val_accuracy: 0.9942 - val_loss: 0.0246
Epoch 4/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 2s/step - accuracy: 0.9955 - loss: 0.0233 - val_accuracy: 0.9942 - val_loss: 0.0411
Epoch 5/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 2s/step - accuracy: 0.9971 - loss: 0.0121 - val_accuracy: 0.9942 - val_loss: 0.0291
Epoch 7/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 2s/step - accuracy: 0.9964 - loss: 0.0147 - val_accuracy: 0.9962 - val_loss: 0.0192
Epoch 8/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━