In [2]:
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Function to create the CNN model
def create_cnn_model(input_shape=(224, 224, 3), num_classes=4):
    cnn_input = layers.Input(shape=input_shape)
    
    x = layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(cnn_input)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(2, 2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(2, 2)(x)
    x = layers.Dropout(0.4)(x)

    x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(2, 2)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)

    cnn_output = layers.Dense(num_classes, activation='softmax')(x)

    cnn_model = Model(inputs=cnn_input, outputs=cnn_output)
    cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return cnn_model

# Instantiate the CNN model
cnn_model = create_cnn_model()

# Summary of the model
cnn_model.summary()

# Data Augmentation
train_datagen = image_dataset_from_directory(
    "/kaggle/input/brain-tumor-classification-mri/Training",
    image_size=(224, 224),
    batch_size=32,
    label_mode="categorical",
    validation_split=0.2,
    subset='training',
    seed=123
)

val_datagen = image_dataset_from_directory(
    "/kaggle/input/brain-tumor-classification-mri/Training",
    image_size=(224, 224),
    batch_size=32,
    label_mode="categorical",
    validation_split=0.2,
    subset='validation',
    seed=123
)

test_datagen = image_dataset_from_directory(
    "/kaggle/input/brain-tumor-classification-mri/Testing",
    image_size=(224, 224),
    batch_size=32,
    label_mode="categorical"
)

# Define callbacks for learning rate scheduling and early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)

# Train the CNN model
print("Training CNN model with early stopping, learning rate scheduler, and data augmentation...")
cnn_model.fit(train_datagen, validation_data=val_datagen, epochs=30, callbacks=[early_stopping, lr_scheduler])

# Convert dataset to arrays for feature extraction
def dataset_to_arrays(dataset):
    images = []
    labels = []
    for image_batch, label_batch in dataset:
        images.append(image_batch.numpy())
        labels.append(label_batch.numpy())
    return np.concatenate(images, axis=0), np.concatenate(labels, axis=0)

X_train, y_train = dataset_to_arrays(train_datagen)
X_val, y_val = dataset_to_arrays(val_datagen)
X_test, y_test = dataset_to_arrays(test_datagen)

# Extract features using the trained CNN model
print("Extracting features using CNN...")
cnn_feature_extractor = Model(inputs=cnn_model.input, outputs=cnn_model.layers[-4].output)  # Extract before Dense layer
cnn_features_train = cnn_feature_extractor.predict(X_train)
cnn_features_val = cnn_feature_extractor.predict(X_val)
cnn_features_test = cnn_feature_extractor.predict(X_test)

# Reshape CNN features for SVM and Random Forest compatibility
cnn_features_train = cnn_features_train.reshape(cnn_features_train.shape[0], -1)
cnn_features_val = cnn_features_val.reshape(cnn_features_val.shape[0], -1)
cnn_features_test = cnn_features_test.reshape(cnn_features_test.shape[0], -1)

# Train the SVM model
print("Training SVM model...")
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(cnn_features_train, np.argmax(y_train, axis=1))

# Train the Random Forest model
print("Training Random Forest model...")
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(cnn_features_train, np.argmax(y_train, axis=1))

# Create an ensemble model using SVM and Random Forest
print("Creating ensemble model with SVM and Random Forest...")
ensemble_model = VotingClassifier(estimators=[
    ('svm', svm_model),
    ('rf', rf_model)
], voting='soft')

# Train the ensemble model
ensemble_model.fit(cnn_features_train, np.argmax(y_train, axis=1))

# Evaluation function for all models
def evaluate_model(model, X_test, y_test):
    if isinstance(model, Model):
        y_pred = np.argmax(model.predict(X_test), axis=1)
    else:
        y_pred = model.predict(X_test)
    y_true = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    return accuracy, precision, recall, f1

# Evaluate the CNN model
print("Evaluating CNN model...")
cnn_accuracy, cnn_precision, cnn_recall, cnn_f1 = evaluate_model(cnn_model, X_test, y_test)

# Evaluate the SVM model
print("Evaluating SVM model...")
svm_accuracy, svm_precision, svm_recall, svm_f1 = evaluate_model(svm_model, cnn_features_test, y_test)

# Evaluate the Random Forest model
print("Evaluating Random Forest model...")
rf_accuracy, rf_precision, rf_recall, rf_f1 = evaluate_model(rf_model, cnn_features_test, y_test)

# Evaluate the ensemble model
print("Evaluating Ensemble model...")
ensemble_accuracy, ensemble_precision, ensemble_recall, ensemble_f1 = evaluate_model(ensemble_model, cnn_features_test, y_test)

# Print results for all models
print(f"\nCNN Model - Accuracy: {cnn_accuracy:.4f}, Precision: {cnn_precision:.4f}, Recall: {cnn_recall:.4f}, F1 Score: {cnn_f1:.4f}")
print(f"SVM Model - Accuracy: {svm_accuracy:.4f}, Precision: {svm_precision:.4f}, Recall: {svm_recall:.4f}, F1 Score: {svm_f1:.4f}")
print(f"Random Forest Model - Accuracy: {rf_accuracy:.4f}, Precision: {rf_precision:.4f}, Recall: {rf_recall:.4f}, F1 Score: {rf_f1:.4f}")
print(f"Ensemble Model - Accuracy: {ensemble_accuracy:.4f}, Precision: {ensemble_precision:.4f}, Recall: {ensemble_recall:.4f}, F1 Score: {ensemble_f1:.4f}")

# Print the classification report for the ensemble model
print("\nEnsemble Model Classification Report:")
print(classification_report(np.argmax(y_test, axis=1), ensemble_model.predict(cnn_features_test)))

Found 2870 files belonging to 4 classes.
Using 2296 files for training.
Found 2870 files belonging to 4 classes.
Using 574 files for validation.
Found 394 files belonging to 4 classes.
Training CNN model with early stopping, learning rate scheduler, and data augmentation...
Epoch 1/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 555ms/step - accuracy: 0.6150 - loss: 3.4520 - val_accuracy: 0.3449 - val_loss: 4.7666 - learning_rate: 0.0010
Epoch 2/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 173ms/step - accuracy: 0.7717 - loss: 2.9969 - val_accuracy: 0.7300 - val_loss: 2.9569 - learning_rate: 0.0010
Epoch 3/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 177ms/step - accuracy: 0.8421 - loss: 2.5481 - val_accuracy: 0.6446 - val_loss: 2.7104 - learning_rate: 0.0010
Epoch 4/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 175ms/step - accuracy: 0.8729 - loss: 2.1167 - val_accuracy: 0.5157 - val_loss: 4.1123 -

In [None]:
from sklearn.metrics import classification_report

# Predict and evaluate the CNN model
cnn_predictions = cnn_model.predict(X_test)
cnn_pred_labels = np.argmax(cnn_predictions, axis=1)
cnn_report = classification_report(np.argmax(y_test, axis=1), cnn_pred_labels, target_names=train_data.class_names)

# Predict and evaluate the SVM model
svm_pred_labels = svm_model.predict(cnn_features_test)
svm_report = classification_report(np.argmax(y_test, axis=1), svm_pred_labels, target_names=train_data.class_names)

# Predict and evaluate the Random Forest model
rf_pred_labels = rf_model.predict(cnn_features_test)
rf_report = classification_report(np.argmax(y_test, axis=1), rf_pred_labels, target_names=train_data.class_names)

# Predict and evaluate the Ensemble model
ensemble_pred_labels = ensemble_model.predict(cnn_features_test)
ensemble_report = classification_report(np.argmax(y_test, axis=1), ensemble_pred_labels, target_names=train_data.class_names)

# Print evaluation reports
print("CNN Model Classification Report:")
print(cnn_report)

print("SVM Model Classification Report:")
print(svm_report)

print("Random Forest Model Classification Report:")
print(rf_report)

print("Ensemble Model Classification Report:")
print(ensemble_report)


In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np

# Function to preprocess the new image
def preprocess_image(image_path, target_size=(150, 150)):
    img = image.load_img(image_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = img_array / 255.0  # Normalize
    return img_array

# Load the new image
new_image_path = '/kaggle/input/brain-tumor-classification-mri/Testing/pituitary_tumor/image(15).jpg'  # Replace with the path to your new image
new_image = preprocess_image(new_image_path)

# Extract features from the new image using the CNN model
cnn_features_new = cnn_feature_extractor.predict(new_image)
cnn_features_new = cnn_features_new.reshape(cnn_features_new.shape[0], -1)

# Predict using the ensemble model
predicted_class = ensemble_model.predict(cnn_features_new)

# Map the predicted class index to the actual class name (assuming class labels are in order)
class_labels = train_data.class_names
predicted_label = class_labels[predicted_class[0]]

print(f"Predicted class: {predicted_label}")


In [None]:
# Extract training history from CNN model
history = cnn_model.history.history  # Access the 'history' attribute

# Plot Training and Validation Accuracy
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history['accuracy'], label='Training Accuracy')
plt.plot(history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot Training and Validation Loss
plt.subplot(1, 2, 2)
plt.plot(history['loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Get the indices of the top 10 important features
top_features_indices = np.argsort(feature_importances)[-10:]

# Plot top 10 important features
plt.figure(figsize=(10, 6))
plt.barh(range(10), feature_importances[top_features_indices])
plt.title('Top 10 Feature Importance (Random Forest)')
plt.xlabel('Importance')
plt.ylabel('Feature Index')
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Generate confusion matrix
y_pred = ensemble_model.predict(cnn_features_test)
cm = confusion_matrix(np.argmax(y_test, axis=1), y_pred)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=train_data.class_names, yticklabels=train_data.class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()


In [None]:
cnn_model.save('cnn_model.h5')  # Save the CNN model to a file


In [None]:
import joblib

# Save the Random Forest model
joblib.dump(rf_model, 'rf_model.pkl')

# Save the SVM model
joblib.dump(svm_model, 'svm_model.pkl')

# Save the ensemble model
joblib.dump(ensemble_model, 'ensemble_model.pkl')
