<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classification-VIT/Tumor_Segmentation_using_U_NET_and_Detection_and_Classification_using_ViT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Libraries

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


U- Net Model for Brain Tumor Segmentation

In [2]:
def unet_model(input_shape=(256, 256, 1)):

    # Builds a U-Net model for image segmentation.
    # U-Net is a convolutional neural network architecture designed for biomedical image segmentation.

    # Input layer for the U-Net
    inputs = tf.keras.Input(input_shape)  # Input tensor for the model

    # Encoder (Downsampling Path)

    # Block 1: First convolutional block
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)  # Convolution layer 1
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)      # Convolution layer 2
    p1 = layers.MaxPooling2D((2, 2))(c1)  # Max pooling layer reduces dimensions by 2x

    # Block 2: Second convolutional block
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)  # Convolution layer 1
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)  # Convolution layer 2
    p2 = layers.MaxPooling2D((2, 2))(c2)  # Max pooling layer reduces dimensions further

    # Bottleneck (Bridge between Encoder and Decoder)
    # Block 3: Bottleneck convolutional block
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)  # Convolution layer 1
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)  # Convolution layer 2

    # Decoder (Upsampling Path)

    # Block 4: First upsampling and convolutional block
    u2 = layers.UpSampling2D((2, 2))(c3)  # Upsampling layer: Doubles the spatial dimensions
    u2 = layers.Concatenate()([u2, c2])   # Skip connection: Concatenate with corresponding encoder block
    c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u2)  # Convolution layer 1
    c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c4)  # Convolution layer 2

    # Block 5: Second upsampling and convolutional block
    u1 = layers.UpSampling2D((2, 2))(c4)  # Upsampling layer: Doubles the spatial dimensions
    u1 = layers.Concatenate()([u1, c1])   # Skip connection: Concatenate with corresponding encoder block
    c5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u1)  # Convolution layer 1
    c5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c5)  # Convolution layer 2

    # Output layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c5)  # Sigmoid activation for binary segmentation

    # Return the U-Net model
    return Model(inputs, outputs, name="U-Net")

# Instantiate U-Net model
unet = unet_model()

# Compile the model
unet.compile(optimizer="adam",                # Adam optimizer for faster convergence
             loss="binary_crossentropy",      # Binary cross-entropy for pixel-wise binary classification
             metrics=["accuracy"])            # Track accuracy during training

# Print model summary
unet.summary()  # Display the model architecture


Vision Transformer (ViT) for Tumor Detection and Classification

In [10]:
import tensorflow as tf
from tensorflow.keras import Model, layers

class VisionTransformer(Model):
    """
    Vision Transformer (ViT) implementation for image classification.
    This model processes images as a sequence of patches, applies a transformer encoder,
    and classifies the image into predefined classes.
    """

    def __init__(self, image_size, patch_size, embed_dim, num_heads, mlp_dim, num_classes, num_layers):
        """
        Initialize the Vision Transformer (ViT) model.

        Args:
            image_size (tuple): Size of input images (height, width).
            patch_size (int): Size of each patch extracted from the input image.
            embed_dim (int): Dimension of the patch embedding.
            num_heads (int): Number of attention heads in the multi-head attention layer.
            mlp_dim (int): Hidden dimension of the MLP (feedforward) head.
            num_classes (int): Number of output classes for classification.
            num_layers (int): Number of transformer encoder layers.
        """
        super(VisionTransformer, self).__init__()

        # Compute total number of patches
        self.num_patches = (image_size[0] // patch_size) * (image_size[1] // patch_size)

        # Patch embedding: Extracts image patches and embeds them into a vector of size embed_dim
        self.patch_embedding = layers.Conv2D(embed_dim, patch_size, patch_size, name="patch_embedding")

        # Flatten: Reshape the output of patch embedding to (batch_size, num_patches, embed_dim)
        self.flatten = layers.Reshape((self.num_patches, embed_dim), name="flatten")

        # Positional embedding: Adds positional information to the patch embeddings
        self.position_embedding = self.add_weight(
            name="positional_embedding",
            shape=(1, self.num_patches, embed_dim),
            initializer="random_normal",
            trainable=True
        )

        # Transformer Encoder: LayerNormalization + MultiHeadAttention + FeedForward
        self.encoder_layers = [
            {"norm": layers.LayerNormalization(epsilon=1e-6, name=f"layer_norm_{i}"),
             "attention": layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=f"mh_attention_{i}"),
             "ffn": tf.keras.Sequential([
                 layers.Dense(mlp_dim, activation="gelu", name=f"ffn_dense1_{i}"),
                 layers.Dense(embed_dim, name=f"ffn_dense2_{i}")
             ], name=f"ffn_{i}")}
            for i in range(num_layers)
        ]

        # MLP Head: Classification layer applied after the transformer encoder
        self.mlp_head = tf.keras.Sequential([
            layers.LayerNormalization(epsilon=1e-6, name="mlp_norm"),
            layers.GlobalAveragePooling1D(name="gap"),
            layers.Dense(mlp_dim, activation="relu", name="mlp_dense1"),
            layers.Dense(num_classes, activation="softmax", name="mlp_output")
        ])

    def call(self, x):
        """
        Forward pass for the Vision Transformer model.

        Args:
            x (tf.Tensor): Input tensor representing the image batch.

        Returns:
            tf.Tensor: Output tensor with class probabilities.
        """
        # Step 1: Convert input image into patch embeddings
        x = self.patch_embedding(x)
        x = self.flatten(x)

        # Step 2: Add positional embeddings to the patch embeddings
        x += self.position_embedding

        # Step 3: Pass the embeddings through transformer encoder layers
        for layer in self.encoder_layers:
            # Attention block with residual connection
            attention_output = layer["attention"](x, x)
            x = layer["norm"](x + attention_output)

            # FeedForward block with residual connection
            ffn_output = layer["ffn"](x)
            x = layer["norm"](x + ffn_output)

        # Step 4: Pass the output through the classification MLP head
        return self.mlp_head(x)


# Input shape definition
image_size = (128, 128)  # Input image size (Height, Width)
num_channels = 3         # Number of channels (RGB images)

# Instantiate the Vision Transformer model
vit = VisionTransformer(
    image_size=image_size,  # Input image size
    patch_size=16,          # Size of each patch
    embed_dim=64,           # Embedding dimension
    num_heads=4,            # Number of attention heads
    mlp_dim=128,            # Hidden dimension in MLP head
    num_classes=3,          # Number of classes
    num_layers=6            # Number of transformer encoder layers
)

# Define input tensor and create the model
input_tensor = tf.keras.Input(shape=(image_size[0], image_size[1], num_channels), name="input_layer")
output_tensor = vit(input_tensor)

# Wrap the model using Keras Functional API
model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor, name="VisionTransformer")

# Compile the model
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Display the model summary
model.summary()


Data Preparation

Load and Preprocess data

In [None]:

def load_data(image_dir, mask_dir, img_size=(224, 224)):
    images = []
    masks = []

    print("Starting data loading...")
    for subfolder in os.listdir(image_dir):
        # Skip hidden files and ensure it's a valid directory
        if subfolder.startswith("."):
            continue

        img_subfolder_path = os.path.join(image_dir, subfolder)
        mask_subfolder_path = os.path.join(mask_dir, subfolder)

        # Check if both subfolders exist
        if os.path.isdir(img_subfolder_path) and os.path.isdir(mask_subfolder_path):
            print(f"Processing subfolder: {subfolder}")

            for img_file in os.listdir(img_subfolder_path):
                # Skip hidden files
                if img_file.startswith("."):
                    continue

                if img_file.startswith("image_") and img_file.endswith(".png"):
                    # Construct the corresponding mask file name
                    mask_file = img_file.replace("image_", "mask_image_")

                    img_path = os.path.join(img_subfolder_path, img_file)
                    mask_path = os.path.join(mask_subfolder_path, mask_file)

                    print(f"Checking paths:\n  Image: {img_path}\n  Mask: {mask_path}")

                    # Check if both the image and mask files exist
                    if os.path.exists(img_path) and os.path.exists(mask_path):
                        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

                        if img is not None and mask is not None:
                            # Resize and normalize the images
                            img = cv2.resize(img, img_size) / 255.0
                            mask = cv2.resize(mask, img_size) / 255.0
                            images.append(np.expand_dims(img, axis=-1))  # Add channel dimension
                            masks.append(np.expand_dims(mask, axis=-1))  # Add channel dimension
                        else:
                            print(f"Error loading image or mask: {img_path} or {mask_path}")
                    else:
                        print(f"Skipping missing file: {img_path} or {mask_path}")
        else:
            print(f"Subfolder missing in one of the directories: {subfolder}")

    print(f"Data loading complete. Loaded {len(images)} images and {len(masks)} masks.")
    return np.array(images), np.array(masks)

# Paths to image and mask directories
image_dir = "/content/drive/MyDrive/DSGP_BrainTumorDetection/Preprocessed_Dataset_classes_morepreprocess_techniques"
mask_dir = "/content/drive/MyDrive/DSGP_BrainTumorDetection/Tumor_Mask_Dataset"

# Load data
images, masks = load_data(image_dir, mask_dir)

# Check loaded data
if len(images) > 0 and len(masks) > 0:
    print(f"Loaded {len(images)} images and {len(masks)} masks successfully.")
else:
    print("No images or masks were loaded. Check your directory structure and files.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  Mask: /content/drive/MyDrive/DSGP_BrainTumorDetection/Tumor_Mask_Dataset/no tumour/mask_image_1670.png
Checking paths:
  Image: /content/drive/MyDrive/DSGP_BrainTumorDetection/Preprocessed_Dataset_classes_morepreprocess_techniques/no tumour/image_1671.png
  Mask: /content/drive/MyDrive/DSGP_BrainTumorDetection/Tumor_Mask_Dataset/no tumour/mask_image_1671.png
Checking paths:
  Image: /content/drive/MyDrive/DSGP_BrainTumorDetection/Preprocessed_Dataset_classes_morepreprocess_techniques/no tumour/image_1672.png
  Mask: /content/drive/MyDrive/DSGP_BrainTumorDetection/Tumor_Mask_Dataset/no tumour/mask_image_1672.png
Checking paths:
  Image: /content/drive/MyDrive/DSGP_BrainTumorDetection/Preprocessed_Dataset_classes_morepreprocess_techniques/no tumour/image_1673.png
  Mask: /content/drive/MyDrive/DSGP_BrainTumorDetection/Tumor_Mask_Dataset/no tumour/mask_image_1673.png
Checking paths:
  Image: /content/drive/MyDrive/DSGP_Bra

Resize Tumor Regions for ViT

In [None]:
def extract_tumor_region(image, mask, img_size=(128, 128)):
    extracted_region = image * mask
    extracted_region_resized = cv2.resize(extracted_region, img_size)
    return np.expand_dims(extracted_region_resized, axis=-1)

vit_inputs = [extract_tumor_region(image, mask) for image, mask in zip(x_train, y_train)]


Training U-Net and ViT

In [None]:
# Train U-Net for segmentation
print("Training U-Net...")
unet.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10, batch_size=8)

# Generate tumor regions using U-Net predictions
predicted_masks = unet.predict(x_train)
vit_inputs = [extract_tumor_region(image, mask) for image, mask in zip(x_train, predicted_masks)]

# Prepare labels for classification
vit_labels = np.random.randint(0, 3, size=(len(vit_inputs),))  # Replace with actual labels
vit_labels = tf.keras.utils.to_categorical(vit_labels, num_classes=3)

# Train Vision Transformer for classification
print("Training ViT...")
vit.fit(np.array(vit_inputs), vit_labels, epochs=10, batch_size=8)


Testing Pipline

In [None]:
def pipeline_test(image):
    # Step 1: Segment tumor
    mask = unet.predict(np.expand_dims(image, axis=0))
    mask = (mask > 0.5).astype(np.float32)

    # Step 2: Extract tumor region
    tumor_region = extract_tumor_region(image, mask[0])

    # Step 3: Classify tumor
    tumor_class = vit.predict(np.expand_dims(tumor_region, axis=0))
    class_names = ["Glioma", "Meningioma", "Pituitary Tumor"]
    print("Predicted Tumor Type:", class_names[np.argmax(tumor_class)])
