# Brain Tumor Detection - Complete Pipeline

This notebook contains the complete code for Data Downloading, Preprocessing, Model Definition, and Training for the Brain Tumor Detection system.
You can run this directly in Google Colab.

In [None]:
# 1. Install Dependencies
!pip install kagglehub tensorflow opencv-python matplotlib scikit-learn

In [None]:
# 2. Download Dataset
import kagglehub
import shutil
import os

def download_and_setup_data():
    print("Downloading dataset...")
    path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")
    print("Path to dataset files:", path)
    
    # Define target path
    target_path = os.path.join(os.getcwd(), "data", "real")
    
    # Clean previous real data if exists
    if os.path.exists(target_path):
        shutil.rmtree(target_path)
    
    # Copy to project directory
    print(f"Copying to {target_path}...")
    shutil.copytree(path, target_path)
    print("Dataset setup complete.")

download_and_setup_data()

In [None]:
# 3. Imports
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
# 4. Preprocessing Functions
def load_data(data_dir, img_size=(224, 224)):
    """
    Loads images from the data directory.
    Assumes structure:
    data_dir/
        glioma/
        meningioma/
        notumor/
        pituitary/
    Maps to Binary: 0 (No Tumor), 1 (Tumor)
    """
    # Map directory names to binary labels
    # notumor -> 0
    # others -> 1
    categories = ['notumor', 'glioma', 'meningioma', 'pituitary']
    
    data = []
    labels = []
    
    for category in categories:
        path = os.path.join(data_dir, category)
        
        # Determine label
        if category == 'notumor':
            binary_label = 0
        else:
            binary_label = 1
        
        if not os.path.exists(path):
            print(f"Warning: Directory {path} does not exist. Skipping...")
            continue
            
        print(f"Loading {category}...")
        for img_name in os.listdir(path):
            try:
                img_path = os.path.join(path, img_name)
                img_array = cv2.imread(img_path)
                if img_array is None:
                    continue
                new_array = cv2.resize(img_array, img_size)
                data.append(new_array)
                labels.append(binary_label)
            except Exception as e:
                pass
                
    return np.array(data), np.array(labels)

def preprocess_data(data, labels):
    """
    Normalizes data and converts labels to categorical.
    """
    # Normalize pixel values to [0, 1]
    data = data / 255.0
    
    # Convert labels to numpy array if not already
    labels = np.array(labels)
    
    return data, labels

In [None]:
# 5. Model Definition
def build_model(input_shape=(224, 224, 3)):
    model = Sequential([
        # Convolutional Layer 1
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        
        # Convolutional Layer 2
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        
        # Convolutional Layer 3
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        
        # Flattening
        Flatten(),
        
        # Fully Connected Layer
        Dense(128, activation='relu'),
        Dropout(0.5), # Add dropout to prevent overfitting
        
        # Output Layer (Binary Classification: Tumor vs No Tumor)
        Dense(1, activation='sigmoid') 
    ])
    
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

model = build_model()
model.summary()

In [None]:
# 6. Training Pipeline
def train_pipeline():
    # Load and preprocess data
    # In Colab/Monolithic, the path is relative to where we downloaded it
    raw_path = "data/real/Training"
    print("Loading data...")
    X, y = load_data(raw_path)
    
    if len(X) == 0:
        print("No data found. Please place images in data/real/Training")
        return

    print(f"Data loaded: {len(X)} samples")
    X, y = preprocess_data(X, y)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Build model
    model = build_model()
    
    # Callbacks
    checkpoint_path = "brain_tumor_model.keras"
    
    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_best_only=True, monitor='val_loss'),
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    ]
    
    # Train
    print("Starting training...")
    history = model.fit(
        X_train, y_train,
        epochs=20, 
        batch_size=32, 
        validation_data=(X_test, y_test),
        callbacks=callbacks
    )
    
    print("Training complete.")
    
    # Evaluate
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy*100:.2f}%")
    
    return history, model

history, model = train_pipeline()

In [None]:
# 7. Visualization
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()