In [1]:
import requests
import zipfile
import os

# Step 1: Download the Dataset
url = "https://www.cancerimagingarchive.net/wp-content/uploads/BrEaST-Lesions_USG-images_and_masks-Dec-15-2023.zip"
local_filename = "BrEaST-Lesions_USG-images_and_masks-Dec-15-2023.zip"

# Download the file from the URL
with requests.get(url, stream=True) as r:
    r.raise_for_status()
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)

print(f"Downloaded {local_filename}")

Downloaded BrEaST-Lesions_USG-images_and_masks-Dec-15-2023.zip


In [2]:
# Step 2: Extract the ZIP File
with zipfile.ZipFile(local_filename, 'r') as zip_ref:
    zip_ref.extractall("Breast_Lesions_USG_Dataset")

In [16]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

def dice_coefficient(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    return (2. * intersection + tf.keras.backend.epsilon()) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + tf.keras.backend.epsilon())

def iou(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    union = tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) - intersection
    return (intersection + tf.keras.backend.epsilon()) / (union + tf.keras.backend.epsilon())

def precision(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    true_positives = tf.reduce_sum(y_true_f * y_pred_f)
    predicted_positives = tf.reduce_sum(y_pred_f)
    return true_positives / (predicted_positives + tf.keras.backend.epsilon())

def recall(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    true_positives = tf.reduce_sum(y_true_f * y_pred_f)
    possible_positives = tf.reduce_sum(y_true_f)
    return true_positives / (possible_positives + tf.keras.backend.epsilon())

def f1_score(y_true, y_pred):
    prec = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    return 2 * ((prec * rec) / (prec + rec + tf.keras.backend.epsilon()))

def specificity(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    true_negatives = tf.reduce_sum((1 - y_true_f) * (1 - y_pred_f))
    possible_negatives = tf.reduce_sum(1 - y_true_f)
    return true_negatives / (possible_negatives + tf.keras.backend.epsilon())

def sensitivity(y_true, y_pred):
    return recall(y_true, y_pred)

def unet_model(input_size=(128, 128, 3)):
    inputs = Input(input_size)

    # Encoder
    c1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    c1 = Conv2D(64, 3, activation='relu', padding='same')(c1)
    p1 = MaxPooling2D(pool_size=(2, 2))(c1)

    c2 = Conv2D(128, 3, activation='relu', padding='same')(p1)
    c2 = Conv2D(128, 3, activation='relu', padding='same')(c2)
    p2 = MaxPooling2D(pool_size=(2, 2))(c2)

    c3 = Conv2D(256, 3, activation='relu', padding='same')(p2)
    c3 = Conv2D(256, 3, activation='relu', padding='same')(c3)
    p3 = MaxPooling2D(pool_size=(2, 2))(c3)

    c4 = Conv2D(512, 3, activation='relu', padding='same')(p3)
    c4 = Conv2D(512, 3, activation='relu', padding='same')(c4)
    p4 = MaxPooling2D(pool_size=(2, 2))(c4)

    # Bottleneck
    c5 = Conv2D(1024, 3, activation='relu', padding='same')(p4)
    c5 = Conv2D(1024, 3, activation='relu', padding='same')(c5)

    # Decoder
    u6 = UpSampling2D(size=(2, 2))(c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(512, 3, activation='relu', padding='same')(u6)
    c6 = Conv2D(512, 3, activation='relu', padding='same')(c6)

    u7 = UpSampling2D(size=(2, 2))(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(256, 3, activation='relu', padding='same')(u7)
    c7 = Conv2D(256, 3, activation='relu', padding='same')(c7)

    u8 = UpSampling2D(size=(2, 2))(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(128, 3, activation='relu', padding='same')(u8)
    c8 = Conv2D(128, 3, activation='relu', padding='same')(c8)

    u9 = UpSampling2D(size=(2, 2))(c8)
    u9 = concatenate([u9, c1])
    c9 = Conv2D(64, 3, activation='relu', padding='same')(u9)
    c9 = Conv2D(64, 3, activation='relu', padding='same')(c9)

    outputs = Conv2D(1, 1, activation='sigmoid')(c9)

    model = Model(inputs=[inputs], outputs=[outputs])

    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=[
        'accuracy', dice_coefficient, iou, precision, recall, f1_score, specificity, sensitivity])

    return model

# Build the model
model = unet_model()

# Display the model summary
model.summary()


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 128, 128, 3)]        0         []                            
                                                                                                  
 conv2d_43 (Conv2D)          (None, 128, 128, 64)         1792      ['input_3[0][0]']             
                                                                                                  
 conv2d_44 (Conv2D)          (None, 128, 128, 64)         36928     ['conv2d_43[0][0]']           
                                                                                                  
 max_pooling2d_8 (MaxPoolin  (None, 64, 64, 64)           0         ['conv2d_44[0][0]']           
 g2D)                                                                                       

In [23]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.optimizers import Adam

# Define metrics
def dice_coefficient(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    return (2. * intersection + tf.keras.backend.epsilon()) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + tf.keras.backend.epsilon())

def iou(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    union = tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) - intersection
    return (intersection + tf.keras.backend.epsilon()) / (union + tf.keras.backend.epsilon())

def precision(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    true_positives = tf.reduce_sum(y_true_f * y_pred_f)
    predicted_positives = tf.reduce_sum(y_pred_f)
    return true_positives / (predicted_positives + tf.keras.backend.epsilon())

def recall(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    true_positives = tf.reduce_sum(y_true_f * y_pred_f)
    possible_positives = tf.reduce_sum(y_true_f)
    return true_positives / (possible_positives + tf.keras.backend.epsilon())

def f1_score(y_true, y_pred):
    prec = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    return 2 * ((prec * rec) / (prec + rec + tf.keras.backend.epsilon()))

def specificity(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    true_negatives = tf.reduce_sum((1 - y_true_f) * (1 - y_pred_f))
    possible_negatives = tf.reduce_sum(1 - y_true_f)
    return true_negatives / (possible_negatives + tf.keras.backend.epsilon())

def sensitivity(y_true, y_pred):
    return recall(y_true, y_pred)

# Define U-Net model
def unet_model(input_size=(256, 256, 3)):
    inputs = Input(input_size)

    # Encoder
    c1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    c1 = Conv2D(64, 3, activation='relu', padding='same')(c1)
    p1 = MaxPooling2D(pool_size=(2, 2))(c1)

    c2 = Conv2D(128, 3, activation='relu', padding='same')(p1)
    c2 = Conv2D(128, 3, activation='relu', padding='same')(c2)
    p2 = MaxPooling2D(pool_size=(2, 2))(c2)

    c3 = Conv2D(256, 3, activation='relu', padding='same')(p2)
    c3 = Conv2D(256, 3, activation='relu', padding='same')(c3)
    p3 = MaxPooling2D(pool_size=(2, 2))(c3)

    c4 = Conv2D(512, 3, activation='relu', padding='same')(p3)
    c4 = Conv2D(512, 3, activation='relu', padding='same')(c4)
    p4 = MaxPooling2D(pool_size=(2, 2))(c4)

    # Bottleneck
    c5 = Conv2D(1024, 3, activation='relu', padding='same')(p4)
    c5 = Conv2D(1024, 3, activation='relu', padding='same')(c5)

    # Decoder
    u6 = UpSampling2D(size=(2, 2))(c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(512, 3, activation='relu', padding='same')(u6)
    c6 = Conv2D(512, 3, activation='relu', padding='same')(c6)

    u7 = UpSampling2D(size=(2, 2))(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(256, 3, activation='relu', padding='same')(u7)
    c7 = Conv2D(256, 3, activation='relu', padding='same')(c7)

    u8 = UpSampling2D(size=(2, 2))(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(128, 3, activation='relu', padding='same')(u8)
    c8 = Conv2D(128, 3, activation='relu', padding='same')(c8)

    u9 = UpSampling2D(size=(2, 2))(c8)
    u9 = concatenate([u9, c1])
    c9 = Conv2D(64, 3, activation='relu', padding='same')(u9)
    c9 = Conv2D(64, 3, activation='relu', padding='same')(c9)

    outputs = Conv2D(1, 1, activation='sigmoid')(c9)

    model = Model(inputs=[inputs], outputs=[outputs])

    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=[
        'accuracy', dice_coefficient, iou, precision, recall, f1_score, specificity, sensitivity])

    return model

# Function to find the mask file corresponding to an image
def find_mask_file(image_path, possible_suffixes):
    base_name = os.path.splitext(image_path)[0]
    for suffix in possible_suffixes:
        mask_path = f"{base_name}{suffix}.png"
        if os.path.exists(mask_path):
            return mask_path
    return None

# Function to load images and masks
def load_images_and_masks(base_data_dirs, img_size=(256, 256)):
    images = []
    masks = []
    
    possible_suffixes = ['_mask', '_mask_mask', '_tumor']

    for base_data_dir in base_data_dirs:
        for sub_dir in os.listdir(base_data_dir):
            data_dir = os.path.join(base_data_dir, sub_dir)
            if os.path.isdir(data_dir):
                files = sorted(os.listdir(data_dir))
                for file in files:
                    if file.startswith('.'):
                        continue  # Skip hidden files/directories
                    if any(suffix in file for suffix in possible_suffixes):
                        continue  # Skip mask files in the same directory
                    img_path = os.path.join(data_dir, file)
                    mask_path = find_mask_file(img_path, possible_suffixes)
                    if not mask_path:
                        print(f"Failed to load mask for image: {img_path}")
                        continue
                    image = cv2.imread(img_path)
                    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                    if image is None or mask is None:
                        print(f"Failed to load image or mask: {img_path}, {mask_path}")
                        continue
                    image = cv2.resize(image, img_size)
                    mask = cv2.resize(mask, img_size)
                    images.append(image)
                    masks.append(mask)
            else:
                if not any(suffix in sub_dir for suffix in possible_suffixes) and not sub_dir.startswith('.'):
                    img_path = os.path.join(base_data_dir, sub_dir)
                    mask_path = find_mask_file(img_path, possible_suffixes)
                    if not mask_path:
                        print(f"Failed to load mask for image: {img_path}")
                        continue
                    image = cv2.imread(img_path)
                    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                    if image is None or mask is None:
                        print(f"Failed to load image or mask: {img_path}, {mask_path}")
                        continue
                    image = cv2.resize(image, img_size)
                    mask = cv2.resize(mask, img_size)
                    images.append(image)
                    masks.append(mask)
    
    return np.array(images), np.array(masks)

# Load dataset
dataset_dirs = [
    'breast_ultrasound_images/Dataset_BUSI_with_GT',
    'Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks'
]

images, masks = load_images_and_masks(dataset_dirs)

# Expand dimensions to include channel
masks = np.expand_dims(masks, axis=-1)

# Normalize images and masks
images = images / 255.0
masks = masks / 255.0

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, masks, test_size=0.2, random_state=42)
data_gen_args = dict(rotation_range=90,
                     width_shift_range=0.1,
                     height_shift_range=0.1,
                     shear_range=0.2,
                     zoom_range=0.2,
                     horizontal_flip=True,
                     vertical_flip=True,
                     fill_mode='nearest')

image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

# Fit the data generators
seed = 1
image_datagen.fit(X_train, augment=True, seed=seed)
mask_datagen.fit(y_train, augment=True, seed=seed)

# Create generators
image_generator = image_datagen.flow(X_train, batch_size=32, seed=seed)
mask_generator = mask_datagen.flow(y_train, batch_size=32, seed=seed)

# Combine generators into one that yields image and masks
train_generator = zip(image_generator, mask_generator)

# Instantiate and compile the model
model = unet_model()

# Display the model summary
model.summary()

# Define the callbacks
checkpoint_dir = './checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch:02d}')
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True,
    save_best_only=True,
    monitor='val_loss',
    mode='min'
)

early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    patience=12,
    monitor='val_loss',
    mode='min',
    restore_best_weights=True
)

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train) // 32,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[checkpoint_callback, early_stopping_callback]
)

# Save the model
model.save('breast_cancer_segmentation_model.h5')

Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case085_other2.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case092_other1.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case022_other1.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case022_other2.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case140_other2.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case213.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case036_other1.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case036_other2.png
Failed to load mask for image: Breast_Lesions_USG_Dataset/BrEaST-Lesions_USG-images_and_masks/case061.p

2024-07-20 14:57:18.478415: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.52GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-07-20 14:57:19.271222: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.02GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-07-20 14:57:19.502229: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.52GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


 1/25 [>.............................] - ETA: 18:10 - loss: 0.7167 - accuracy: 0.0444 - dice_coefficient: 0.0786 - iou: 0.0409 - precision: 0.0426 - recall: 0.5112 - f1_score: 0.0786 - specificity: 0.4874 - sensitivity: 0.5112

2024-07-20 14:57:53.627933: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.52GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-07-20 14:57:54.524995: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.02GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-07-20 14:57:54.746865: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.52GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


Epoch 2/100


2024-07-20 14:59:04.989651: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.52GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100


  saving_api.save_model(
