<a href="https://colab.research.google.com/github/HarshithaShetty27/CerebralFusion/blob/main/Brain_tumor_hybrid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import zipfile
zip_path = '/content/drive/MyDrive/Brain_Tumor_Dataset/brain_tumor.zip'
extract_path = '/content/Brain_Tumor_Dataset'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

Mounted at /content/drive


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

base_dir = '/content/Brain_Tumor_Dataset'
categories = ['glioma', 'meningioma', 'notumor', 'pituitary']

# Split dataset into training, validation, and test sets
train_dir = os.path.join(base_dir, 'Training')
val_dir = os.path.join(base_dir, 'Validation')
test_dir = os.path.join(base_dir, 'Testing')

for category in categories:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

# Use ImageDataGenerator for loading and augmentation
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.15)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training')

val_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation')

test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

Found 4857 images belonging to 4 classes.
Found 855 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.


In [4]:
from skimage.filters import threshold_otsu
from skimage import img_as_ubyte
import tensorflow as tf

def preprocess_image(image):
    # OTSU thresholding
    thresh = threshold_otsu(image)
    binary = image > thresh
    binary = img_as_ubyte(binary)

    # Resize and normalize
    image = tf.image.resize(image, [224, 224])
    image = image / 255.0

    return image

In [5]:
datagen_aug = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

train_generator_aug = datagen_aug.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

Found 5712 images belonging to 4 classes.


In [6]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from google.colab import drive
import os

# Mount Google Drive
# drive.mount('/content/drive')

# Define the path to save the model in Google Drive
model_save_path = '/content/drive/MyDrive/models/vgg16_model.h5'

# Check if the model already exists
if os.path.exists(model_save_path):
    # Load the pre-trained model from Google Drive
    from tensorflow.keras.models import load_model
    model_vgg = load_model(model_save_path)
    print("Model loaded from Google Drive.")
else:
    # Build the VGG16 model
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(len(categories), activation='softmax')(x)
    model_vgg = Model(inputs=base_model.input, outputs=predictions)

    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    # Compile the model
    model_vgg.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    history_vgg = model_vgg.fit(train_generator_aug, validation_data=val_generator, epochs=10)

    # Save the trained model to Google Drive
    model_vgg.save(model_save_path)
    print("Model trained and saved to Google Drive.")



Model loaded from Google Drive.


In [7]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to save the model in Google Drive
model_save_path = '/content/drive/MyDrive/models/resnet50_model.h5'

# Check if the model already exists
if os.path.exists(model_save_path):
    # Load the pre-trained model from Google Drive
    from tensorflow.keras.models import load_model
    model_resnet = load_model(model_save_path)
    print("ResNet50 model loaded from Google Drive.")
else:
    # Build the ResNet50 model
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(len(categories), activation='softmax')(x)
    model_resnet = Model(inputs=base_model.input, outputs=predictions)

    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    # Compile the model
    model_resnet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    history_resnet = model_resnet.fit(train_generator_aug, validation_data=val_generator, epochs=10)

    # Save the trained model to Google Drive
    model_resnet.save(model_save_path)
    print("ResNet50 model trained and saved to Google Drive.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




ResNet50 model loaded from Google Drive.


In [8]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to save the model in Google Drive
model_save_path = '/content/drive/MyDrive/models/densenet121_model.h5'

# Check if the model already exists
if os.path.exists(model_save_path):
    # Load the pre-trained model from Google Drive
    from tensorflow.keras.models import load_model
    model_densenet = load_model(model_save_path)
    print("DenseNet121 model loaded from Google Drive.")
else:
    # Build the DenseNet121 model
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(len(categories),
                        activation='softmax')(x)
    model_densenet = Model(inputs=base_model.input, outputs=predictions)

    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    # Compile the model
    model_densenet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    history_densenet = model_densenet.fit(train_generator_aug, validation_data=val_generator, epochs=10)

    # Save the trained model to Google Drive
    model_densenet.save(model_save_path)
    print("DenseNet121 model trained and saved to Google Drive.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




DenseNet121 model loaded from Google Drive.


In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to save the model in Google Drive
model_save_path = '/content/drive/MyDrive/models/custom_cnn_model.h5'

# Check if the model already exists
if os.path.exists(model_save_path):
    # Load the pre-trained model from Google Drive
    from tensorflow.keras.models import load_model
    model_custom = load_model(model_save_path)
    print("Custom CNN model loaded from Google Drive.")
else:
    # Build the custom CNN model
    model_custom = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(len(categories), activation='softmax')
    ])

# Compile the model
    model_custom.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    history_custom = model_custom.fit(train_generator_aug, validation_data=val_generator, epochs=10)

    # Save the trained model to Google Drive
    model_custom.save(model_save_path)
    print("Custom CNN model trained and saved to Google Drive.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Custom CNN model loaded from Google Drive.


In [None]:
# from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, UpSampling2D, concatenate
# from tensorflow.keras.models import Model

# def unet_model(input_size=(224, 224, 3)):
#     inputs = Input(input_size)

#     # Downsample path
#     conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
#     conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
#     pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

#     conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
#     conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
#     pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

#     conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
#     conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)
#     pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

#     # Bottleneck
#     conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool3)
#     conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv4)

#     # Upsample path
#     up5 = UpSampling2D(size=(2, 2))(conv4)
#     up5 = Conv2D(256, (2, 2), activation='relu', padding='same')(up5)
#     merge5 = concatenate([conv3, up5], axis=3)
#     conv5 = Conv2D(256, (3, 3), activation='relu', padding='same')(merge5)
#     conv5 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv5)

#     up6 = UpSampling2D(size=(2, 2))(conv5)
#     up6 = Conv2D(128, (2, 2), activation='relu', padding='same')(up6)
#     merge6 = concatenate([conv2, up6], axis=3)
#     conv6 = Conv2D(128, (3, 3), activation='relu', padding='same')(merge6)
#     conv6 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv6)

#     up7 = UpSampling2D(size=(2, 2))(conv6)
#     up7 = Conv2D(64, (2, 2), activation='relu', padding='same')(up7)
#     merge7 = concatenate([conv1, up7], axis=3)
#     conv7 = Conv2D(64, (3, 3), activation='relu', padding='same')(merge7)
#     conv7 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv7)

#     # Output for segmentation
#     outputs_seg = Conv2D(1, (1, 1), activation='sigmoid')(conv7)

#     # Classification head
#     flatten = Flatten()(conv7)
#     dense1 = Dense(128, activation='relu')(flatten)
#     outputs_cls = Dense(len(categories), activation='softmax')(dense1)

#     model = Model(inputs=inputs, outputs=[outputs_seg, outputs_cls])
#     return model

# # Compile the model
# model_unet = unet_model()
# model_unet.compile(optimizer='adam',loss=['binary_crossentropy', 'categorical_crossentropy'],metrics=['accuracy'])

# # Train the model
# history_unet = model_unet.fit(train_generator_aug,validation_data=val_generator, epochs=10)


In [10]:
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from google.colab import drive
import joblib
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to save the model in Google Drive
model_save_path = '/content/drive/MyDrive/models/svm_model.pkl'

# Define the function before checking for model existence
def extract_gabor_features(images):
    features = []
    kernels = []
    for theta in range(4):
        theta = theta / 4. * np.pi
        for sigma in (1, 3):
            for lamda in np.arange(0, np.pi, np.pi / 4):
                for gamma in (0.05, 0.5):
                    kernel = cv2.getGaborKernel((21, 21), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                    kernels.append(kernel)

    for image in images:
        feature_vector = []
        for kernel in kernels:
            filtered = cv2.filter2D(image, cv2.CV_8UC3, kernel)
            feature_vector.append(filtered.mean())
        features.append(feature_vector)
    return np.array(features)

# Check if the model already exists
if os.path.exists(model_save_path):
    # Load the pre-trained model from Google Drive
    svm_model = joblib.load(model_save_path)
    print("SVM model loaded from Google Drive.")
else:
    # Load images and labels
    images = []
    labels = []
    for category in categories:
        category_dir = os.path.join(train_dir, category)
        for img_name in os.listdir(category_dir):
            img_path = os.path.join(category_dir, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (224, 224))
            images.append(img)
            labels.append(categories.index(category))

    # Extract Gabor features
    X = extract_gabor_features(images)
    y = np.array(labels)

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train SVM
    svm_model = SVC(kernel='linear', probability=True)
    svm_model.fit(X_train, y_train)

    # Save the trained model to Google Drive
    joblib.dump(svm_model, model_save_path)
    print("SVM model trained and saved to Google Drive.")

# Evaluate SVM
# y_pred = svm_model.predict(X_val)
# print("SVM Accuracy:", accuracy_score(y_val, y_pred))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
SVM model loaded from Google Drive.


In [11]:
from sklearn.ensemble import RandomForestClassifier
from skimage.feature import hog
from skimage import exposure
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from google.colab import drive
import joblib
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to save the model in Google Drive
model_save_path = '/content/drive/MyDrive/models/random_forest_model.pkl'

# Define the function before checking for model existence
def extract_hog_features(images):
    features = []
    for image in images:
        # Convert RGB image to grayscale if necessary
        if len(image.shape) == 3 and image.shape[2] == 3:  # Check if image is RGB
            image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        # Extract HOG features
        fd, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                    visualize=True, channel_axis=None)
        features.append(fd)
    return np.array(features)

# Check if the model already exists
if os.path.exists(model_save_path):
    # Load the pre-trained model from Google Drive
    rf_model = joblib.load(model_save_path)
    print("Random Forest model loaded from Google Drive.")
else:
    # Load images and labels
    images = []
    labels = []
    categories = ["category1", "category2"]  # Update with actual category names
    train_dir = "/content/drive/MyDrive/dataset/train"  # Update with your dataset path

    for category in categories:
        category_dir = os.path.join(train_dir, category)
        for img_name in os.listdir(category_dir):
            img_path = os.path.join(category_dir, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load image in grayscale
            img = cv2.resize(img, (224, 224))  # Resize image
            images.append(img)
            labels.append(categories.index(category))

    # Extract HOG features
    X_hog = extract_hog_features(images)
    y = np.array(labels)

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_hog, y, test_size=0.2, random_state=42)

    # Train Random Forest
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)

    # Save the trained model to Google Drive
    joblib.dump(rf_model, model_save_path)
    print("Random Forest model trained and saved to Google Drive.")

# Evaluate Random Forest
# y_pred = rf_model.predict(X_val)
# print("Random Forest Accuracy:", accuracy_score(y_val, y_pred))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Random Forest model loaded from Google Drive.


In [12]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# Function to evaluate a model
def evaluate_model(model, X_val, y_val, is_deep_learning=False):
    if is_deep_learning:
        y_pred_proba = model.predict(X_val)
        y_pred = np.argmax(y_pred_proba, axis=1)
        y_val = np.argmax(y_val, axis=1)  # Convert one-hot to integer labels
    else:
        y_pred = model.predict(X_val)
        y_pred_proba = model.predict_proba(X_val)  # For binary classification

    accuracy = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred, average='weighted')
    # Handle AUC-ROC correctly for multi-class classification
    if len(np.unique(y_val)) > 2:  # Multi-class case
        auc_roc = roc_auc_score(y_val, y_pred_proba, multi_class='ovr')
    else:  # Binary case
        auc_roc = roc_auc_score(y_val, y_pred_proba[:, 1])  # Take probability of class 1
    return accuracy, f1, auc_roc

# Evaluate all models
models = {
    'VGG16': model_vgg,
    'ResNet50': model_resnet,
    'DenseNet': model_densenet,
    'CNN': model_custom,
    'SVM': svm_model,
    'RandomForest': rf_model
}

results = {}
# Get the full validation data
X_val_dl_full = []
y_val_dl_full = []
for i in range(len(val_generator)):
    X_batch, y_batch = val_generator[i]
    X_val_dl_full.append(X_batch)
    y_val_dl_full.append(y_batch)
X_val_dl_full = np.vstack(X_val_dl_full)
y_val_dl_full = np.vstack(y_val_dl_full)

# Convert RGB images to grayscale for HOG feature extraction
X_val_dl_gray = np.array([cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if len(img.shape) == 3 else img for img in X_val_dl_full])

# Extract features for SVM and Random Forest
X_val_gabor = extract_gabor_features(X_val_dl_full)  # Gabor features for SVM
X_val_hog = extract_hog_features(X_val_dl_gray)  # HOG features for Random Forest

# Evaluate each model
for name, model in models.items():
    if name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
        # For deep learning models, use the full validation data
        accuracy, f1, auc_roc = evaluate_model(model, X_val_dl_full, y_val_dl_full, is_deep_learning=True)
    else:
        # For SVM and Random Forest, use the feature-extracted validation set
        if name == 'SVM':
            X_val_fe = X_val_gabor  # Use Gabor features for SVM
        else:
            X_val_fe = X_val_hog  # Use HOG features for Random Forest
        accuracy, f1, auc_roc = evaluate_model(model, X_val_fe, np.argmax(y_val_dl_full, axis=1), is_deep_learning=False)
    results[name] = {'Accuracy': accuracy, 'F1-Score': f1, 'AUC-ROC': auc_roc}

# Convert results to a DataFrame for easy comparison
import pandas as pd
results_df = pd.DataFrame(results).T
print("Model Performance:")
print(results_df)

# Dynamically select top 3 models based on a combined score (e.g., average of normalized metrics)
results_df['Combined_Score'] = (results_df['Accuracy'] + results_df['F1-Score'] + results_df['AUC-ROC']) / 3
top_3_models = results_df.nlargest(3, 'Combined_Score').index.tolist()
print("\nTop 3 Models:",top_3_models)

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m535s[0m 20s/step
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 6s/step
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 6s/step
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 1s/step
Model Performance:
              Accuracy  F1-Score   AUC-ROC
VGG16         0.231579  0.087090  0.768889
ResNet50      0.242105  0.108129  0.482114
DenseNet      0.402339  0.340346  0.712611
CNN           0.231579  0.087090  0.350435
SVM           0.231579  0.087090  0.663919
RandomForest  0.926316  0.926196  0.992236

Top 3 Models: ['RandomForest', 'DenseNet', 'VGG16']


In [13]:
# Get predictions from top 3 models
y_preds = []
for model_name in top_3_models:
    model = models[model_name]
    if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
        # For deep learning models, use the full validation data
        y_pred_proba = model.predict(X_val_dl_full)
    else:
        # For SVM and Random Forest, use the feature-extracted validation set
        if model_name == 'SVM':
            X_val_fe = X_val_gabor  # Use Gabor features for SVM
        else:
            X_val_fe = X_val_hog  # Use HOG features for Random Forest
        y_pred_proba = model.predict_proba(X_val_fe)
    y_preds.append(y_pred_proba)

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 6s/step
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m539s[0m 20s/step


In [14]:
# Weighted averaging (weights based on model performance)
weights = results_df.loc[top_3_models, 'Combined_Score'].values
weights /= weights.sum()  # Normalize weights
y_weighted_avg = np.average(y_preds, axis=0, weights=weights)
y_weighted_avg_classes = np.argmax(y_weighted_avg, axis=1)

# Evaluate weighted averaging hybrid model
accuracy_weighted = accuracy_score(np.argmax(y_val_dl_full, axis=1), y_weighted_avg_classes)
f1_weighted = f1_score(np.argmax(y_val_dl_full, axis=1), y_weighted_avg_classes, average='weighted')
auc_roc_weighted = roc_auc_score(np.argmax(y_val_dl_full, axis=1), y_weighted_avg, multi_class='ovr')
print("\nWeighted Averaging Hybrid Model Performance:")
print(f"Accuracy: {accuracy_weighted}, F1-Score: {f1_weighted}, AUC-ROC: {auc_roc_weighted}")


Weighted Averaging Hybrid Model Performance:
Accuracy: 0.8023391812865497, F1-Score: 0.8067753864823796, AUC-ROC: 0.9876052372044419


In [15]:
from sklearn.linear_model import LogisticRegression

# Stack predictions from top 3 models
X_stack = np.hstack(y_preds)

# Train a meta-learner (Logistic Regression)
meta_learner = LogisticRegression()
meta_learner.fit(X_stack, np.argmax(y_val_dl_full, axis=1))

# Evaluate stacking hybrid model
y_stack_pred = meta_learner.predict(X_stack)
y_stack_pred_proba = meta_learner.predict_proba(X_stack)

accuracy_stack = accuracy_score(np.argmax(y_val_dl_full, axis=1), y_stack_pred)
f1_stack = f1_score(np.argmax(y_val_dl_full, axis=1), y_stack_pred, average='weighted')
auc_roc_stack = roc_auc_score(np.argmax(y_val_dl_full, axis=1), y_stack_pred_proba, multi_class='ovr')
print("\nStacking Hybrid Model Performance:")
print(f"Accuracy: {accuracy_stack}, F1-Score: {f1_stack}, AUC-ROC: {auc_roc_stack}")


Stacking Hybrid Model Performance:
Accuracy: 0.9298245614035088, F1-Score: 0.9298709018370045, AUC-ROC: 0.9933081621119015


In [16]:
from scipy.stats import mode

# Majority voting
y_pred_classes = np.array([np.argmax(y_pred, axis=1) for y_pred in y_preds])  # Stack predictions into a 2D array
print("Shape of y_pred_classes:", y_pred_classes.shape)  # Verify the shape

y_majority_vote, _ = mode(y_pred_classes, axis=0)  # Apply mode along the correct axis
y_majority_vote = y_majority_vote.flatten()  # Flatten the result to get 1D array

# Compute average probability predictions for AUC-ROC
y_avg_proba = np.mean(y_preds, axis=0)  # Average probabilities across models

# Verify shapes
print("Shape of y_avg_proba:", y_avg_proba.shape)
print("Shape of y_val_dl_full:", y_val_dl_full.shape)

# Evaluate majority voting hybrid model
accuracy_majority = accuracy_score(np.argmax(y_val_dl_full, axis=1), y_majority_vote)
f1_majority = f1_score(np.argmax(y_val_dl_full, axis=1), y_majority_vote, average='weighted')
auc_roc_majority = roc_auc_score(np.argmax(y_val_dl_full, axis=1), y_avg_proba, multi_class='ovr')
print("\nMajority Voting Hybrid Model Performance:")
print(f"Accuracy: {accuracy_majority}, F1-Score: {f1_majority}, AUC-ROC: {auc_roc_majority}")

Shape of y_pred_classes: (3, 855)
Shape of y_avg_proba: (855, 4)
Shape of y_val_dl_full: (855, 4)

Majority Voting Hybrid Model Performance:
Accuracy: 0.408187134502924, F1-Score: 0.3565507762581732, AUC-ROC: 0.9754513880067884


In [17]:
# Compare hybrid model results
hybrid_results = {
    'Weighted Averaging': {'Accuracy': accuracy_weighted, 'F1-Score': f1_weighted, 'AUC-ROC': auc_roc_weighted},
    'Stacking': {'Accuracy': accuracy_stack, 'F1-Score': f1_stack, 'AUC-ROC': auc_roc_stack},
    'Majority Voting': {'Accuracy': accuracy_majority, 'F1-Score': f1_majority, 'AUC-ROC': auc_roc_majority}
}

# Convert to DataFrame for easy comparison
hybrid_results_df = pd.DataFrame(hybrid_results).T
hybrid_results_df['Combined_Score'] = (hybrid_results_df['Accuracy'] + hybrid_results_df['F1-Score'] + hybrid_results_df['AUC-ROC']) / 3
print("Hybrid Model Performance:")
print(hybrid_results_df)

# Select the best hybrid model
best_hybrid_model = hybrid_results_df.idxmax()['Combined_Score']
print("\nBest Hybrid Model:", best_hybrid_model)

Hybrid Model Performance:
                    Accuracy  F1-Score   AUC-ROC  Combined_Score
Weighted Averaging  0.802339  0.806775  0.987605        0.865573
Stacking            0.929825  0.929871  0.993308        0.951001
Majority Voting     0.408187  0.356551  0.975451        0.580063

Best Hybrid Model: Stacking


In [18]:
# Get the full test data
X_test_dl_full = []
y_test_dl_full = []
for i in range(len(test_generator)):
    X_batch, y_batch = test_generator[i]
    X_test_dl_full.append(X_batch)
    y_test_dl_full.append(y_batch)
X_test_dl_full = np.vstack(X_test_dl_full)
y_test_dl_full = np.vstack(y_test_dl_full)

# Convert RGB images to grayscale for HOG feature extraction (if needed)
X_test_dl_gray = np.array([cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if len(img.shape) == 3 else img for img in X_test_dl_full])

# Extract features for SVM and Random Forest
X_test_gabor = extract_gabor_features(X_test_dl_full)  # Gabor features for SVM
X_test_hog = extract_hog_features(X_test_dl_gray)  # HOG features for Random Forest

In [19]:
if best_hybrid_model == 'Weighted Averaging':
    # Get predictions from top 3 models for the test set
    y_preds_test = []
    for model_name in top_3_models:
        model = models[model_name]
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            y_pred_proba = model.predict(X_test_dl_full)
        else:
            if model_name == 'SVM':
                X_test_fe = X_test_gabor  # Use Gabor features for SVM
            else:
                X_test_fe = X_test_hog  # Use HOG features for Random Forest
            y_pred_proba = model.predict_proba(X_test_fe)
        y_preds_test.append(y_pred_proba)

    # Weighted averaging (weights based on model performance)
    weights = results_df.loc[top_3_models, 'Combined_Score'].values
    weights /= weights.sum()  # Normalize weights
    y_weighted_avg_test = np.average(y_preds_test, axis=0, weights=weights)
    y_weighted_avg_classes_test = np.argmax(y_weighted_avg_test, axis=1)

    # Evaluate weighted averaging hybrid model on the test set
    accuracy_test = accuracy_score(np.argmax(y_test_dl_full, axis=1), y_weighted_avg_classes_test)
    f1_test = f1_score(np.argmax(y_test_dl_full, axis=1), y_weighted_avg_classes_test, average='weighted')
    auc_roc_test = roc_auc_score(np.argmax(y_test_dl_full, axis=1), y_weighted_avg_test, multi_class='ovr')
    print("\nWeighted Averaging Hybrid Model Performance on Test Set:")
    print(f"Accuracy: {accuracy_test}, F1-Score: {f1_test}, AUC-ROC: {auc_roc_test}")

elif best_hybrid_model == 'Stacking':
    # Get predictions from top 3 models for the test set
    y_preds_test = []
    for model_name in top_3_models:
        model = models[model_name]
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            y_pred_proba = model.predict(X_test_dl_full)
        else:
            if model_name == 'SVM':
                X_test_fe = X_test_gabor  # Use Gabor features for SVM
            else:
                X_test_fe = X_test_hog  # Use HOG features for Random Forest
            y_pred_proba = model.predict_proba(X_test_fe)
        y_preds_test.append(y_pred_proba)
        # Stack predictions
    X_stack_test = np.hstack(y_preds_test)

    # Evaluate stacking hybrid model on the test set
    y_stack_pred_test = meta_learner.predict(X_stack_test)
    y_stack_pred_proba_test = meta_learner.predict_proba(X_stack_test)

    accuracy_test = accuracy_score(np.argmax(y_test_dl_full, axis=1), y_stack_pred_test)
    f1_test = f1_score(np.argmax(y_test_dl_full, axis=1), y_stack_pred_test, average='weighted')
    auc_roc_test = roc_auc_score(np.argmax(y_test_dl_full, axis=1), y_stack_pred_proba_test, multi_class='ovr')
    print("\nStacking Hybrid Model Performance on Test Set:")
    print(f"Accuracy: {accuracy_test}, F1-Score: {f1_test}, AUC-ROC: {auc_roc_test}")

elif best_hybrid_model == 'Majority Voting':
    # Get predictions from top 3 models for the test set
    y_preds_test = []
    for model_name in top_3_models:
        model = models[model_name]
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            y_pred_proba = model.predict(X_test_dl_full)
        else:
            if model_name == 'SVM':
                X_test_fe = X_test_gabor  # Use Gabor features for SVM
            else:
                X_test_fe = X_test_hog  # Use HOG features for Random Forest
            y_pred_proba = model.predict_proba(X_test_fe)
        y_preds_test.append(y_pred_proba)

    # Majority voting
    y_pred_classes_test = np.array([np.argmax(y_pred, axis=1) for y_pred in y_preds_test])
    y_majority_vote_test, _ = mode(y_pred_classes_test, axis=0)
    y_majority_vote_test = y_majority_vote_test.flatten()
  # Evaluate majority voting hybrid model on the test set
    accuracy_test = accuracy_score(np.argmax(y_test_dl_full, axis=1), y_majority_vote_test)
    f1_test = f1_score(np.argmax(y_test_dl_full, axis=1), y_majority_vote_test, average='weighted')
    auc_roc_test = roc_auc_score(np.argmax(y_test_dl_full, axis=1), np.mean(y_preds_test, axis=0), multi_class='ovr')
    print("\nMajority Voting Hybrid Model Performance on Test Set:")
    print(f"Accuracy: {accuracy_test}, F1-Score: {f1_test}, AUC-ROC: {auc_roc_test}")


[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 6s/step
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m813s[0m 20s/step

Stacking Hybrid Model Performance on Test Set:
Accuracy: 0.8703279938977879, F1-Score: 0.8658970982004874, AUC-ROC: 0.9739805217029791


In [20]:
import joblib

# Save the best hybrid model
if best_hybrid_model == 'Weighted Averaging':
    # Save the top 3 models
    for model_name in top_3_models:
        model = models[model_name]
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            model.save(f'{model_name}_model.h5')
        else:
            joblib.dump(model, f'{model_name}_model.pkl')
elif best_hybrid_model == 'Stacking':
    # Save the meta-learner and top 3 models
    joblib.dump(meta_learner, 'stacking_meta_learner.pkl')
    for model_name in top_3_models:
        model = models[model_name]
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            model.save(f'{model_name}_model.h5')
        else:
            joblib.dump(model, f'{model_name}_model.pkl')
elif best_hybrid_model == 'Majority Voting':
    # Save the top 3 models
    for model_name in top_3_models:
        model = models[model_name]
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            model.save(f'{model_name}_model.h5')
        else:
            joblib.dump(model, f'{model_name}_model.pkl')



In [27]:
import cv2
import numpy as np
import joblib
from tensorflow.keras.models import load_model
from scipy.stats import mode
from google.colab import files
from skimage.feature import hog  # Import HOG feature extractor

def extract_hog_features(image):
    # Ensure the image is large enough for HOG feature extraction
    min_size = 16  # Minimum size required by HOG
    if image.shape[0] < min_size or image.shape[1] < min_size:
        # Resize the image to meet the minimum size requirement
        image = cv2.resize(image, (min_size, min_size))

    # Extract HOG features
    features = hog(
        image,
        orientations=9,  # Number of orientation bins
        pixels_per_cell=(8, 8),  # Size of a cell
        cells_per_block=(2, 2),  # Number of cells in each block
        block_norm='L2-Hys',  # Normalization method
        visualize=False,  # Do not return the HOG image
        feature_vector=True  # Return features as a 1D array
    )
    return features.reshape(1, -1)  # Reshape to match input format for classifiers

def predict_with_hybrid_model(image_path):
    # Load the image and preprocess it
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Load as grayscale
    img = cv2.resize(img, (224, 224))  # Resize to 224x224
    img = img / 255.0  # Normalize
    img = np.expand_dims(img, axis=-1)  # Add channel dimension (1 channel)
    img = np.repeat(img, 3, axis=-1)  # Convert to 3 channels (RGB)
    img = np.expand_dims(img, axis=0)  # Add batch dimension (1, 224, 224, 3)

    # Load the top 3 models
    top_models = {}
    for model_name in top_3_models:
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            top_models[model_name] = load_model(f'{model_name}_model.h5')
        else:
            top_models[model_name] = joblib.load(f'{model_name}_model.pkl')

    # Get predictions from the top 3 models
    y_preds = []
    for model_name, model in top_models.items():
        if model_name in ['VGG16', 'ResNet50', 'DenseNet', 'CNN']:
            y_pred_proba = model.predict(img)  # Predict using deep learning models
        else:
            if model_name == 'SVM':
                features = extract_gabor_features(img[0, :, :, 0])  # Extract features from grayscale
            else:
                features = extract_hog_features(img[0, :, :, 0])  # Extract features from grayscale
            y_pred_proba = model.predict_proba(features)
        y_preds.append(y_pred_proba)

    # Combine predictions based on the best hybrid model
    if best_hybrid_model == 'Weighted Averaging':
        weights = results_df.loc[top_3_models, 'Combined_Score'].values
        weights /= weights.sum()  # Normalize weights
        y_weighted_avg = np.average(y_preds, axis=0, weights=weights)
        final_pred = np.argmax(y_weighted_avg, axis=1)
    elif best_hybrid_model == 'Stacking':
        X_stack = np.hstack(y_preds)
        meta_learner = joblib.load('stacking_meta_learner.pkl')
        final_pred = meta_learner.predict(X_stack)
    elif best_hybrid_model == 'Majority Voting':
        y_pred_classes = np.array([np.argmax(y_pred, axis=1) for y_pred in y_preds])
        final_pred, _ = mode(y_pred_classes, axis=0)
        final_pred = final_pred.flatten()

    # Map prediction to class label
    class_labels = ['glioma', 'meningioma', 'notumor', 'pituitary']
    return class_labels[final_pred[0]]

# Upload an image using Google Colab's file uploader
uploaded = files.upload()
image_path = next(iter(uploaded))

# Make a prediction
if image_path:
    prediction = predict_with_hybrid_model(image_path)
    print("Predicted Class:", prediction)
else:
    print("No image selected.")

Saving Y12.jpg to Y12.jpg




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 945ms/step
Predicted Class: pituitary
