<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classification-VIT/Brain_Tumour_Classification_Using_VIT_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Step 1: Load and preprocess the dataset
def load_dataset(base_dir):
    """Load images and labels from the dataset."""
    images = []  # List to store image data
    labels = []  # List to store corresponding labels
    class_names = sorted(os.listdir(base_dir))  # Get class names from folder names

    for label, class_name in enumerate(class_names):  # Iterate over each class
        class_dir = os.path.join(base_dir, class_name)  # Path to the class folder
        for file in os.listdir(class_dir):  # Iterate over each image file in the folder
            file_path = os.path.join(class_dir, file)  # Full path to the image file
            # Load image as grayscale and resize to 224x224
            image = tf.keras.preprocessing.image.load_img(file_path, color_mode='grayscale', target_size=(224, 224))
            # Convert the image to a numpy array and normalize pixel values to [0, 1]
            image = tf.keras.preprocessing.image.img_to_array(image) / 255.0
            images.append(image)  # Add the image to the list
            labels.append(label)  # Add the label to the list

    return np.array(images), np.array(labels), class_names  # Return the images, labels, and class names


In [None]:
# Define dataset directory
base_dir = "path/to/dataset"  # Path to the dataset folder


In [None]:
# Load the dataset
images, labels, class_names = load_dataset(base_dir)  # Load images, labels, and class names


In [None]:
# Split the dataset equally for each class
train_images, test_images, train_labels, test_labels = [], [], [], []  # Initialize empty lists for train/test splits
for label in range(len(class_names)):  # Iterate over each class label
    class_indices = np.where(labels == label)[0]  # Get indices of all images belonging to the current class
    class_images = images[class_indices]  # Extract images for the current class
    class_labels = labels[class_indices]  # Extract labels for the current class
    # Split the class data into training and testing sets (80% train, 20% test)
    train_x, test_x, train_y, test_y = train_test_split(class_images, class_labels, test_size=0.2, random_state=42)
    train_images.extend(train_x)  # Add training images to the train list
    test_images.extend(test_x)  # Add testing images to the test list
    train_labels.extend(train_y)  # Add training labels to the train list
    test_labels.extend(test_y)  # Add testing labels to the test list


In [None]:
# Convert lists to numpy arrays
train_images = np.array(train_images)
test_images = np.array(test_images)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)


In [None]:
# Step 2: Create the Vision Transformer (ViT) model
def create_vit_model(input_shape, num_classes):
    """Create a Vision Transformer model."""
    inputs = layers.Input(shape=input_shape)  # Input layer with the specified shape

    # Patch embedding
    patch_size = 16  # Size of each image patch
    num_patches = (input_shape[0] // patch_size) * (input_shape[1] // patch_size)  # Calculate total number of patches
    patches = layers.Conv2D(filters=64, kernel_size=patch_size, strides=patch_size, padding='valid')(inputs)  # Extract patches
    patches = layers.Reshape((num_patches, -1))(patches)  # Reshape patches into 2D array for transformer

    # Positional embedding
    positional_embedding = layers.Embedding(input_dim=num_patches, output_dim=64)(tf.range(num_patches))  # Add positional info
    x = patches + positional_embedding  # Add positional embeddings to the patch embeddings

    # Transformer encoder layers
    for _ in range(8):  # Add 8 transformer encoder layers
        # Multi-head self-attention
        attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=64)(x, x)  # Apply attention mechanism
        x = layers.Add()([x, attention_output])  # Add residual connection
        x = layers.LayerNormalization()(x)  # Normalize the output

        # Feed-forward network
        ff_output = layers.Dense(128, activation='relu')(x)  # First dense layer with ReLU activation
        ff_output = layers.Dense(64)(ff_output)  # Second dense layer without activation
        x = layers.Add()([x, ff_output])  # Add residual connection
        x = layers.LayerNormalization()(x)  # Normalize the output

    # Classification head
    x = layers.GlobalAveragePooling1D()(x)  # Global average pooling to reduce dimensions
    outputs = layers.Dense(num_classes, activation='softmax')(x)  # Output layer with softmax activation

    model = models.Model(inputs, outputs)  # Create the model
    return model  # Return the model
