In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import json
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import (
    Activation,
    Conv2D,
    Conv2DTranspose,
    MaxPooling2D,
    UpSampling2D,
    concatenate
)
from tensorflow.keras.optimizers import Adam


2025-08-03 23:27:12.185320: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754263632.360260      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754263632.422311      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
DATA_DIR = '/kaggle/input/lgg-mri-segmentation/kaggle_3m'


In [3]:
import os
import shutil

DATA_DIR = '/kaggle/input/lgg-mri-segmentation/kaggle_3m'

images_dir = '/kaggle/working/images/'
masks_dir = '/kaggle/working/masks/'

os.makedirs(images_dir, exist_ok=True)
os.makedirs(masks_dir, exist_ok=True)

temp_images = []
temp_masks = []

for root, dirs, files in os.walk(DATA_DIR):
    for file in files:
        if file.endswith('.tif'):
            if 'mask' in file.lower():
                shutil.copy(os.path.join(root, file), os.path.join(masks_dir, file))
                temp_masks.append(file)
            else:
                shutil.copy(os.path.join(root, file), os.path.join(images_dir, file))
                temp_images.append(file)

final_images = []
final_masks = []

for img_file in sorted(temp_images):
    expected_mask = img_file.replace('.tif', '_mask.tif')
    if expected_mask in temp_masks:
        final_images.append(img_file)
        final_masks.append(expected_mask)

print(f"Total matched image-mask pairs: {len(final_images)}")
print(f"Total images copied: {len(temp_images)}")
print(f"Total masks copied: {len(temp_masks)}")



Total matched image-mask pairs: 3929
Total images copied: 3929
Total masks copied: 3929


In [4]:


def load_data(images_dir, masks_dir, img_size=256):
    images = []
    masks = []

    image_files = sorted(os.listdir(images_dir))
    mask_files = sorted(os.listdir(masks_dir))

    if len(image_files) != len(mask_files):
        print("⚠️ Warning: Number of images and masks do not match!")

    for img_file, mask_file in zip(image_files, mask_files):
        img_path = os.path.join(images_dir, img_file)
        mask_path = os.path.join(masks_dir, mask_file)

        # Load image
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        if img is None:
            print(f"[!] Image not loaded: {img_path}")
            continue
        img = cv2.resize(img, (img_size, img_size))
        img = img / 255.0  # Normalize

        # Load mask
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            print(f"[!] Mask not loaded: {mask_path}")
            continue
        mask = cv2.resize(mask, (img_size, img_size))
        mask = mask / 255.0

        # Skip empty masks (no tumor)
        if np.max(mask) == 0:
            print(f"[!] Skipping empty mask: {mask_file}")
            continue

        mask = np.expand_dims(mask, axis=-1)

        images.append(img)
        masks.append(mask)

    images = np.array(images, dtype=np.float32)
    masks = np.array(masks, dtype=np.float32)

    print(f"Loaded {len(images)} image-mask pairs with tumor.")
    return images, masks


❗ We exclude empty masks (that contain no tumor) 
because training the model on them can lead to poor learning. 
The model may simply learn to always predict background (0), 
resulting in falsely high accuracy and low Dice score.


In [5]:
images_dir = '/kaggle/working/images'
masks_dir = '/kaggle/working/masks'

X, Y = load_data(images_dir, masks_dir)

print("Data loaded successfully.")
print(f"Images shape: {X.shape}")     # (num_samples, 256, 256, 3)
print(f"Masks shape:  {Y.shape}")     # (num_samples, 256, 256, 1)

if X.shape[0] != Y.shape[0]:
    print("⚠️ Warning: Number of images and masks do not match!")
else:
    print(f"🔢 Total image-mask pairs: {X.shape[0]}")



[!] Skipping empty mask: TCGA_CS_4941_19960909_10_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_19_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_1_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_20_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_21_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_22_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_23_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_2_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_3_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_4_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_5_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_6_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_7_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_8_mask.tif
[!] Skipping empty mask: TCGA_CS_4941_19960909_9_mask.tif
[!] Skipping empty mask: TCGA_CS_4942_19970222_15_mask.tif
[!] Skipping empty mask: TCGA_CS_4942_19970222_16_mask.tif
[!] Sk

In [6]:


index = 100
sample_image = X[index]
sample_mask = Y[index]

pred_mask = model.predict(np.expand_dims(sample_image, axis=0), verbose=0)[0]

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.imshow(sample_image)
plt.title("Original MRI")
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(sample_mask.squeeze(), cmap='gray')
plt.title("Ground Truth Mask")
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(pred_mask.squeeze(), cmap='gray')
plt.title("Predicted Mask (Raw Output)")
plt.axis('off')

plt.tight_layout()
plt.show()


NameError: name 'model' is not defined

In [None]:


idx = random.randint(0, len(X)-1)

plt.figure(figsize=(10,4))

plt.subplot(1,2,1)
plt.imshow(X[idx])
plt.title("MRI Image")

plt.subplot(1,2,2)
plt.imshow(Y[idx].squeeze(), cmap='gray')
plt.title("Mask")

plt.show()


### 🧠 Visualize Overlay of Image and Segmentation Mask (Ground Truth)

In [None]:
def visualize_full_comparison(model, image, true_mask, index=None):
    import matplotlib.pyplot as plt
    import numpy as np
    import cv2

    pred_mask = model.predict(np.expand_dims(image, axis=0), verbose=0)[0]
    pred_mask_bin = (pred_mask > 0.5).astype(np.float32)

    image_uint8 = (image * 255).astype(np.uint8)
    
    def create_overlay(img, mask, color=(255, 0, 0)):
        mask = mask.squeeze()
        mask_rgb = np.zeros_like(img)
        mask_rgb[:, :, 0] = (mask * 255).astype(np.uint8)  # Red channel
        return cv2.addWeighted(img, 0.7, mask_rgb, 0.3, 0)

    overlay_gt = create_overlay(image_uint8, true_mask)
    overlay_pred = create_overlay(image_uint8, pred_mask_bin)

    plt.figure(figsize=(20, 8))

    plt.subplot(1, 5, 1)
    plt.imshow(image)
    plt.title("🧠 Original MRI")
    plt.axis('off')

    plt.subplot(1, 5, 2)
    plt.imshow(true_mask.squeeze(), cmap='gray')
    plt.title("🎯 Ground Truth Mask")
    plt.axis('off')

    plt.subplot(1, 5, 3)
    plt.imshow(pred_mask.squeeze(), cmap='gray')
    plt.title("🔮 Predicted Mask (Raw)")
    plt.axis('off')

    plt.subplot(1, 5, 4)
    plt.imshow(overlay_gt)
    plt.title("📌 Overlay with GT")
    plt.axis('off')

    plt.subplot(1, 5, 5)
    plt.imshow(overlay_pred)
    plt.title("📌 Overlay with Prediction")
    plt.axis('off')

    plt.tight_layout()
    if index is not None:
        plt.suptitle(f"Sample Index: {index}", fontsize=16, y=1.05)
    plt.show()

idx = 100
visualize_full_comparison(model, X[idx], Y[idx], index=idx)


In [None]:
def visualize_full_comparison(model, image, true_mask, index=None):
    import matplotlib.pyplot as plt
    import numpy as np
    import cv2

    pred_mask = model.predict(np.expand_dims(image, axis=0), verbose=0)[0]
    pred_mask_bin = (pred_mask > 0.5).astype(np.float32)

    image_uint8 = (image * 255).astype(np.uint8)
    
    def create_overlay(img, mask, color=(255, 0, 0)):
        mask = mask.squeeze()
        mask_rgb = np.zeros_like(img)
        mask_rgb[:, :, 0] = (mask * 255).astype(np.uint8)  # Red channel
        return cv2.addWeighted(img, 0.7, mask_rgb, 0.3, 0)

    overlay_gt = create_overlay(image_uint8, true_mask)
    overlay_pred = create_overlay(image_uint8, pred_mask_bin)

    plt.figure(figsize=(20, 8))

    plt.subplot(1, 5, 1)
    plt.imshow(image)
    plt.title("Original MRI")
    plt.axis('off')

    plt.subplot(1, 5, 2)
    plt.imshow(true_mask.squeeze(), cmap='gray')
    plt.title("Ground Truth Mask")
    plt.axis('off')

    plt.subplot(1, 5, 3)
    plt.imshow(pred_mask.squeeze(), cmap='gray')
    plt.title("Predicted Mask (Raw)")
    plt.axis('off')

    plt.subplot(1, 5, 4)
    plt.imshow(overlay_gt)
    plt.title("Overlay with GT")
    plt.axis('off')

    plt.subplot(1, 5, 5)
    plt.imshow(overlay_pred)
    plt.title(" Overlay with Prediction")
    plt.axis('off')

    plt.tight_layout()
    if index is not None:
        plt.suptitle(f"Sample Index: {index}", fontsize=16, y=1.05)
    plt.show()

idx = 100
visualize_full_comparison(model, X[idx], Y[idx], index=idx)


In [None]:
fig, ax = plt.subplots(3, 6, figsize=(16, 9))
fig.suptitle('MRI vs Ground Truth vs Prediction', fontsize=18)

for i in range(6):
    n = np.random.randint(len(X))  # Random sample index

    image = X[n]
    gt_mask = Y[n]
    pred_mask = model.predict(np.expand_dims(image, axis=0), verbose=0)[0]
    pred_mask_bin = (pred_mask > 0.5).astype(np.float32)

    # Row 0 - Original Image
    ax[0][i].imshow(image)
    ax[0][i].set_xticks([])
    ax[0][i].set_yticks([])
    if i == 0:
        ax[0][i].set_ylabel('MRI', fontsize=14)

    # Row 1 - Ground Truth
    ax[1][i].imshow(gt_mask.squeeze(), cmap='gray')
    ax[1][i].set_xticks([])
    ax[1][i].set_yticks([])
    if i == 0:
        ax[1][i].set_ylabel('Ground Truth', fontsize=14)

    # Row 2 - Prediction
    ax[2][i].imshow(pred_mask_bin.squeeze(), cmap='gray')
    ax[2][i].set_xticks([])
    ax[2][i].set_yticks([])
    if i == 0:
        ax[2][i].set_ylabel('Prediction', fontsize=14)

plt.tight_layout()
plt.subplots_adjust(top=0.92)
plt.show()


In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate, Dropout, BatchNormalization

# Input
inputs = Input((256, 256, 3))

# Encoder
c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
c1 = BatchNormalization()(c1)
c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
c1 = BatchNormalization()(c1)
c1 = Dropout(0.1)(c1)
p1 = MaxPooling2D((2, 2))(c1)

c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
c2 = BatchNormalization()(c2)
c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
c2 = BatchNormalization()(c2)
c2 = Dropout(0.1)(c2)
p2 = MaxPooling2D((2, 2))(c2)

c3 = Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
c3 = BatchNormalization()(c3)
c3 = Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
c3 = BatchNormalization()(c3)
c3 = Dropout(0.2)(c3)
p3 = MaxPooling2D((2, 2))(c3)

c4 = Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
c4 = BatchNormalization()(c4)
c4 = Conv2D(512, (3, 3), activation='relu', padding='same')(c4)
c4 = BatchNormalization()(c4)
c4 = Dropout(0.3)(c4)
p4 = MaxPooling2D((2, 2))(c4)

# Bottleneck
c5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(p4)
c5 = BatchNormalization()(c5)
c5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(c5)
c5 = BatchNormalization()(c5)
c5 = Dropout(0.4)(c5)

# Decoder
u6 = Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(c5)
u6 = concatenate([u6, c4])
c6 = Conv2D(512, (3, 3), activation='relu', padding='same')(u6)
c6 = BatchNormalization()(c6)
c6 = Conv2D(512, (3, 3), activation='relu', padding='same')(c6)
c6 = BatchNormalization()(c6)
c6 = Dropout(0.3)(c6)

u7 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c6)
u7 = concatenate([u7, c3])
c7 = Conv2D(256, (3, 3), activation='relu', padding='same')(u7)
c7 = BatchNormalization()(c7)
c7 = Conv2D(256, (3, 3), activation='relu', padding='same')(c7)
c7 = BatchNormalization()(c7)
c7 = Dropout(0.2)(c7)

u8 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c7)
u8 = concatenate([u8, c2])
c8 = Conv2D(128, (3, 3), activation='relu', padding='same')(u8)
c8 = BatchNormalization()(c8)
c8 = Conv2D(128, (3, 3), activation='relu', padding='same')(c8)
c8 = BatchNormalization()(c8)
c8 = Dropout(0.1)(c8)

u9 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c8)
u9 = concatenate([u9, c1])
c9 = Conv2D(64, (3, 3), activation='relu', padding='same')(u9)
c9 = BatchNormalization()(c9)
c9 = Conv2D(64, (3, 3), activation='relu', padding='same')(c9)
c9 = BatchNormalization()(c9)
c9 = Dropout(0.1)(c9)

# Output
outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)




## 🔷 Define The Model




In [None]:
from tensorflow.keras.models import Model

model = Model(inputs=[inputs], outputs=[outputs])
model.summary()


## 🔷 Compile the U-Net Model


In [None]:
from tensorflow.keras.optimizers import Adam

# Compile Model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


## 🔷 Prepare Data Generator / Splitting Data (Train & Validation)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"Training Data: {X_train.shape}, {Y_train.shape}")
print(f"Validation Data: {X_val.shape}, {Y_val.shape}")


## 🔷 Train the Model


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, verbose=1)

# Train
history = model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    epochs=40,  
    batch_size=8,
    callbacks=[early_stop, reduce_lr, checkpoint]
)



## 🔷 Plot Training & Validation Accuracy/Loss Curves


In [None]:

# Accuracy
plt.figure(figsize=(8, 5))
plt.plot(history.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history.history['val_accuracy'], label='Val Accuracy', marker='o')
plt.title('📈 Accuracy Over Epochs', fontsize=14)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# Loss
plt.figure(figsize=(8, 5))
plt.plot(history.history['loss'], label='Train Loss', marker='o')
plt.plot(history.history['val_loss'], label='Val Loss', marker='o')
plt.title('📉 Loss Over Epochs', fontsize=14)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


## 🔷 Test Prediction on a Sample Image



In [None]:
# Predict on validation sample
sample_img = X_val[0]
sample_mask = Y_val[0]

pred_mask = model.predict(sample_img[np.newaxis, ...])[0]

plt.figure(figsize=(12, 6))

plt.subplot(1, 3, 1)
plt.imshow(sample_img)
plt.title('Original Image')

plt.subplot(1, 3, 2)
plt.imshow(sample_mask.squeeze(), cmap='gray')
plt.title('Ground Truth Mask')

plt.subplot(1, 3, 3)
plt.imshow(pred_mask.squeeze(), cmap='viridis')
plt.title('Predicted Mask')
plt.colorbar()  # Optional

plt.show()



## 🔷 Define Dice Coefficient Metric



In [None]:
import tensorflow.keras.backend as K
import tensorflow as tf

def dice_coefficient(y_true, y_pred, smooth=1e-6):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_loss(y_true, y_pred, smooth=1e-6):
    return 1 - dice_coefficient(y_true, y_pred, smooth)

# 🔹 3. Combined BCE + Dice loss
def combined_bce_dice_loss(y_true, y_pred):
    bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
    dl = dice_loss(y_true, y_pred)
    return bce + dl




## 🔷 Compile the Model Again with Dice Metric


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=combined_bce_dice_loss,
    metrics=[dice_coefficient, 'accuracy']
)





## 🔷 Evaluate Model on Validation Data



In [None]:
results = model.evaluate(X_val, Y_val)
print(f"Validation Loss: {results[0]}")
print(f"Validation Dice Coefficient: {results[2]}")



⚠️ Note: We rely on Dice Coefficient rather than accuracy because it's a much better metric for medical image segmentation.

 In such tasks, the lesion/tumor regions are small compared to the background, so pixel-wise accuracy can be misleading.
 
 A high Dice Score (e.g. 0.98) indicates excellent overlap between predicted and true masks, even if accuracy appears lower.


## 🔷 Predict & Visualize Result


In [None]:
# Predict a sample image
sample_img = X_val[0]
sample_mask = Y_val[0]

# Predict mask (without thresholding)
pred_mask = model.predict(sample_img[np.newaxis, ...])[0]

# Plot Original Image, Ground Truth, Predicted Mask (as probabilities)
plt.figure(figsize=(12,6))

plt.subplot(1,3,1)
plt.imshow(sample_img)
plt.title("Original Image")

plt.subplot(1,3,2)
plt.imshow(sample_mask.squeeze(), cmap='gray')
plt.title("Ground Truth Mask")

plt.subplot(1,3,3)
plt.imshow(pred_mask.squeeze(), cmap='jet')  # Using 'jet' to visualize probabilities
plt.title("Predicted Mask (Probabilities)")

plt.show()


## 🔷 Save Model 



In [None]:
model.save('best_model.h5')


In [None]:
import matplotlib.pyplot as plt
import random
import numpy as np

num_samples = 5
random_indices = random.sample(range(len(X)), num_samples)

plt.figure(figsize=(15, num_samples * 3))

for i, idx in enumerate(random_indices):
    image = X[idx]
    mask = Y[idx]

    pred_mask = model.predict(np.expand_dims(image, axis=0))[0]
    pred_mask_bin = (pred_mask > 0.5).astype(np.float32)

    # Row for each sample
    plt.subplot(num_samples, 3, i*3 + 1)
    plt.imshow(image)
    plt.title(f"Original Image #{idx}")
    plt.axis('off')

    plt.subplot(num_samples, 3, i*3 + 2)
    plt.imshow(mask.squeeze(), cmap='gray')
    plt.title("Ground Truth Mask")
    plt.axis('off')

    plt.subplot(num_samples, 3, i*3 + 3)
    plt.imshow(pred_mask_bin.squeeze(), cmap='gray')
    plt.title("Predicted Mask")
    plt.axis('off')

plt.tight_layout()
plt.show()


In [None]:
from tensorflow.keras.models import load_model

# Load custom model with all required custom loss/metrics
model = load_model(
    'best_model.h5',
    custom_objects={
        'dice_coefficient': dice_coefficient,
        'dice_loss': dice_loss,
        'combined_bce_dice_loss': combined_bce_dice_loss
    }
)

print(" Model loaded successfully!")
