In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv('/content/drive/MyDrive/brain_tumor_dataset.csv')
print(df.head())


          Tumor Type        Location  Size (cm) Grade  Patient Age  Gender
0  Oligodendroglioma  Occipital Lobe       9.23     I           48  Female
1         Ependymoma  Occipital Lobe       0.87    II           47    Male
2         Meningioma  Occipital Lobe       2.33    II           12  Female
3         Ependymoma  Occipital Lobe       1.45   III           38  Female
4         Ependymoma       Brainstem       6.45     I           35  Female


In [2]:
from sklearn.preprocessing import LabelEncoder

# Encode categorical columns
categorical_cols = ['Location', 'Grade', 'Gender']
for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# Encode target column
target_encoder = LabelEncoder()
df['Tumor Type'] = target_encoder.fit_transform(df['Tumor Type'])  # Save this encoder if needed later

# Define X and y
X = df.drop(['Tumor Type'], axis=1)
y = df['Tumor Type']


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [4]:
import numpy as np

class ELM:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.input_weights = np.random.randn(self.input_size, self.hidden_size)
        self.bias = np.random.randn(self.hidden_size)
        self.output_weights = None
        self.one_hot = False
        self.classes = None

    def _activation(self, x):
        return 1 / (1 + np.exp(-x))  # Sigmoid activation

    def fit(self, X, y):
        # One-hot encode y if multi-class
        if len(np.unique(y)) > 2:
            self.one_hot = True
            self.classes = np.unique(y)
            y_onehot = np.zeros((y.shape[0], len(self.classes)))
            for i, label in enumerate(y):
                y_onehot[i, np.where(self.classes == label)[0][0]] = 1
            y = y_onehot
        else:
            self.one_hot = False
            y = np.array(y).reshape(-1, 1)  # FIXED HERE

        H = self._activation(np.dot(X, self.input_weights) + self.bias)
        self.output_weights = np.dot(np.linalg.pinv(H), y)

    def predict(self, X):
        H = self._activation(np.dot(X, self.input_weights) + self.bias)
        y_pred = np.dot(H, self.output_weights)
        if self.one_hot:
            return self.classes[np.argmax(y_pred, axis=1)]
        else:
            return (y_pred > 0.5).astype(int).flatten()

# Initialize and train ELM
elm = ELM(input_size=X_train.shape[1], hidden_size=100, output_size=1)
elm.fit(X_train, y_train)

# Predict on test set
y_pred_elm = elm.predict(X_test)

# Accuracy
from sklearn.metrics import accuracy_score
elm_accuracy = accuracy_score(y_test, y_pred_elm)
print("ELM Accuracy:", elm_accuracy)


ELM Accuracy: 0.21


In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Train decision tree classifier with default params
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

# Predict on test set
y_pred_dt = dt.predict(X_test)

# 2. ELM (Baseline)
elm = ELM(input_size=X_train.shape[1], hidden_size=100, output_size=1)
elm.fit(X_train, y_train)
y_pred_elm = elm.predict(X_test)

# Evaluate
print("Decision Tree Accuracy (Baseline):", accuracy_score(y_test, y_pred_dt))
print("ELM Accuracy (Baseline):", accuracy_score(y_test, y_pred_elm))
print(classification_report(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_elm))


Decision Tree Accuracy (Baseline): 0.21
ELM Accuracy (Baseline): 0.15
              precision    recall  f1-score   support

           0       0.21      0.11      0.14        47
           1       0.21      0.24      0.22        34
           2       0.27      0.27      0.27        44
           3       0.13      0.16      0.14        38
           4       0.23      0.30      0.26        37

    accuracy                           0.21       200
   macro avg       0.21      0.21      0.21       200
weighted avg       0.21      0.21      0.21       200

              precision    recall  f1-score   support

           0       0.38      0.19      0.25        47
           1       0.14      0.21      0.17        34
           2       0.16      0.18      0.17        44
           3       0.04      0.03      0.03        38
           4       0.10      0.14      0.12        37

    accuracy                           0.15       200
   macro avg       0.16      0.15      0.15       200
weighte

In [12]:
from sklearn.model_selection import cross_val_score

def dt_objective(params):
    max_depth, min_samples_split = params
    max_depth = int(max_depth)
    min_samples_split = int(min_samples_split)

    dt = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split, random_state=42)
    scores = cross_val_score(dt, X_train, y_train, cv=5, scoring='accuracy')
    # Return negative accuracy as we minimize
    return -scores.mean()


In [13]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def fitness_function_hba(params, X_train, y_train, X_val, y_val):
    max_depth = int(params[0])
    min_samples_split = int(params[1])

    if max_depth < 1: max_depth = 1
    if min_samples_split < 2: min_samples_split = 2

    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_val)
    return 1 - accuracy_score(y_val, y_pred)  # Lower is better

def honey_badger_algorithm(X_train, y_train, X_val, y_val, n_agents=10, max_iter=20):
    dim = 2
    lb = np.array([1, 2])
    ub = np.array([20, 20])

    # Initialize agents as float for operations
    agents = np.random.randint(low=lb, high=ub + 1, size=(n_agents, dim)).astype(float)
    best_fitness = float("inf")
    best_position = None

    for iter in range(max_iter):
        alpha = 2 * np.exp(-4 * (iter / max_iter) ** 2)
        for i in range(n_agents):
            fitness = fitness_function_hba(agents[i], X_train, y_train, X_val, y_val)
            if fitness < best_fitness:
                best_fitness = fitness
                best_position = agents[i].copy()

        for i in range(n_agents):
            F = alpha * np.random.rand()
            rand_agent = agents[np.random.randint(n_agents)]
            if np.random.rand() < 0.5:
                agents[i] += F * (rand_agent - agents[i])
            else:
                agents[i] -= F * (rand_agent - agents[i])
            # Clamp within bounds
            agents[i] = np.clip(agents[i], lb, ub)

    return best_position.astype(int)

# Train-validation split
X_sub_train, X_val, y_sub_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Run HBA optimizer
best_params_hba = honey_badger_algorithm(X_sub_train, y_sub_train, X_val, y_val)
print("Best HBA Parameters (max_depth, min_samples_split):", best_params_hba)

# Train final Decision Tree using best parameters
clf_hba = DecisionTreeClassifier(max_depth=best_params_hba[0], min_samples_split=best_params_hba[1])
clf_hba.fit(X_train, y_train)
y_pred_hba = clf_hba.predict(X_test)

# Accuracy
hba_accuracy = accuracy_score(y_test, y_pred_hba)
print("HBA-Optimized Decision Tree Accuracy:", hba_accuracy)


Best HBA Parameters (max_depth, min_samples_split): [ 9 11]
HBA-Optimized Decision Tree Accuracy: 0.165


In [14]:
best_max_depth = int(best_params_hba[0])
best_min_samples_split = int(best_params_hba[1])

dt_hba = DecisionTreeClassifier(max_depth=best_max_depth, min_samples_split=best_min_samples_split, random_state=42)
dt_hba.fit(X_train, y_train)

y_pred_hba = dt_hba.predict(X_test)

print("Optimized Decision Tree Accuracy (HBA):", accuracy_score(y_test, y_pred_hba))
print(classification_report(y_test, y_pred_hba))


Optimized Decision Tree Accuracy (HBA): 0.165
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        47
           1       0.16      0.82      0.27        34
           2       0.20      0.05      0.07        44
           3       0.00      0.00      0.00        38
           4       0.33      0.08      0.13        37

    accuracy                           0.17       200
   macro avg       0.14      0.19      0.10       200
weighted avg       0.13      0.17      0.09       200



In [15]:
from sklearn.model_selection import cross_val_score

def dt_objective(params):
    max_depth, min_samples_split = params
    max_depth = int(max_depth)
    min_samples_split = int(min_samples_split)

    dt = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split, random_state=42)
    scores = cross_val_score(dt, X_train, y_train, cv=5, scoring='accuracy')
    # Return negative accuracy as we minimize
    return -scores.mean()


In [16]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def fitness_function_hba(params, X_train, y_train, X_val, y_val):
    max_depth = int(params[0])
    min_samples_split = int(params[1])

    if max_depth < 1: max_depth = 1
    if min_samples_split < 2: min_samples_split = 2

    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_val)
    return 1 - accuracy_score(y_val, y_pred)  # Lower is better

def honey_badger_algorithm(X_train, y_train, X_val, y_val, n_agents=10, max_iter=20):
    dim = 2
    lb = np.array([1, 2])
    ub = np.array([20, 20])

    # Initialize agents as float for operations
    agents = np.random.randint(low=lb, high=ub + 1, size=(n_agents, dim)).astype(float)
    best_fitness = float("inf")
    best_position = None

    for iter in range(max_iter):
        alpha = 2 * np.exp(-4 * (iter / max_iter) ** 2)
        for i in range(n_agents):
            fitness = fitness_function_hba(agents[i], X_train, y_train, X_val, y_val)
            if fitness < best_fitness:
                best_fitness = fitness
                best_position = agents[i].copy()

        for i in range(n_agents):
            F = alpha * np.random.rand()
            rand_agent = agents[np.random.randint(n_agents)]
            if np.random.rand() < 0.5:
                agents[i] += F * (rand_agent - agents[i])
            else:
                agents[i] -= F * (rand_agent - agents[i])
            # Clamp within bounds
            agents[i] = np.clip(agents[i], lb, ub)

    return best_position.astype(int)

# Train-validation split
X_sub_train, X_val, y_sub_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Run HBA optimizer
best_params_hba = honey_badger_algorithm(X_sub_train, y_sub_train, X_val, y_val)
print("Best HBA Parameters (max_depth, min_samples_split):", best_params_hba)

# Train final Decision Tree using best parameters
clf_hba = DecisionTreeClassifier(max_depth=best_params_hba[0], min_samples_split=best_params_hba[1])
clf_hba.fit(X_train, y_train)
y_pred_hba = clf_hba.predict(X_test)

# Accuracy
hba_accuracy = accuracy_score(y_test, y_pred_hba)
print("HBA-Optimized Decision Tree Accuracy:", hba_accuracy)


Best HBA Parameters (max_depth, min_samples_split): [12  4]
HBA-Optimized Decision Tree Accuracy: 0.195


In [17]:
best_max_depth = int(best_params_hba[0])
best_min_samples_split = int(best_params_hba[1])

dt_hba = DecisionTreeClassifier(max_depth=best_max_depth, min_samples_split=best_min_samples_split, random_state=42)
dt_hba.fit(X_train, y_train)

y_pred_hba = dt_hba.predict(X_test)

print("Optimized Decision Tree Accuracy (HBA):", accuracy_score(y_test, y_pred_hba))
print(classification_report(y_test, y_pred_hba))


Optimized Decision Tree Accuracy (HBA): 0.195
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        47
           1       0.18      0.68      0.29        34
           2       0.25      0.32      0.28        44
           3       0.00      0.00      0.00        38
           4       0.29      0.05      0.09        37

    accuracy                           0.20       200
   macro avg       0.14      0.21      0.13       200
weighted avg       0.14      0.20      0.13       200



In [18]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def fitness_function_ssa(params, X_train, y_train, X_val, y_val):
    max_depth = int(params[0])
    min_samples_split = int(params[1])

    if max_depth < 1: max_depth = 1
    if min_samples_split < 2: min_samples_split = 2

    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_val)
    return 1 - accuracy_score(y_val, y_pred)  # Lower is better

def salp_swarm_algorithm(X_train, y_train, X_val, y_val, n_salps=10, max_iter=20):
    dim = 2
    lb = np.array([1, 2])
    ub = np.array([20, 20])

    # Initialize salps as float
    salps = np.random.randint(lb[0], ub[0] + 1, size=(n_salps, dim)).astype(float)
    food_position = None
    food_fitness = float("inf")

    for iter in range(max_iter):
        c1 = 2 * np.exp(-(4 * iter / max_iter) ** 2)  # convergence factor

        for i in range(n_salps):
            fitness = fitness_function_ssa(salps[i], X_train, y_train, X_val, y_val)
            if fitness < food_fitness:
                food_fitness = fitness
                food_position = salps[i].copy()

        for i in range(n_salps):
            for j in range(dim):
                if i == 0:  # leader
                    c2 = np.random.rand()
                    c3 = np.random.rand()
                    if c3 < 0.5:
                        salps[i, j] = food_position[j] + c1 * ((ub[j] - lb[j]) * c2 + lb[j])
                    else:
                        salps[i, j] = food_position[j] - c1 * ((ub[j] - lb[j]) * c2 + lb[j])
                else:  # followers
                    salps[i, j] = (salps[i, j] + salps[i - 1, j]) / 2.0

            # Clamp to bounds
            salps[i] = np.clip(salps[i], lb, ub)

    return food_position.astype(int)

# Train-validation split
X_sub_train, X_val, y_sub_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Run SSA optimizer
best_params_ssa = salp_swarm_algorithm(X_sub_train, y_sub_train, X_val, y_val)
print("Best SSA Parameters (max_depth, min_samples_split):", best_params_ssa)

# Train final Decision Tree using best SSA parameters
clf_ssa = DecisionTreeClassifier(max_depth=best_params_ssa[0], min_samples_split=best_params_ssa[1])
clf_ssa.fit(X_train, y_train)
y_pred_ssa = clf_ssa.predict(X_test)

# Accuracy
ssa_accuracy = accuracy_score(y_test, y_pred_ssa)
print("SSA-Optimized Decision Tree Accuracy:", ssa_accuracy)


Best SSA Parameters (max_depth, min_samples_split): [14  4]
SSA-Optimized Decision Tree Accuracy: 0.19


In [19]:
def fitness_function_elm(hidden_size, X_train, y_train, X_val, y_val):
    hidden_size = int(hidden_size[0])
    hidden_size = max(1, min(hidden_size, 500))
    elm = ELM(input_size=X_train.shape[1], hidden_size=hidden_size, output_size=1)
    elm.fit(X_train, y_train)
    y_pred = elm.predict(X_val)
    return 1 - accuracy_score(y_val, y_pred)

def honey_badger_elm(X_train, y_train, X_val, y_val, n_agents=10, max_iter=20):
    lb = np.array([10])
    ub = np.array([500])
    agents = np.random.randint(low=lb, high=ub + 1, size=(n_agents, 1)).astype(float)
    best_fitness = float("inf")
    best_position = None
    for iter in range(max_iter):
        alpha = 2 * np.exp(-4 * (iter / max_iter) ** 2)
        for i in range(n_agents):
            fitness = fitness_function_elm(agents[i], X_train, y_train, X_val, y_val)
            if fitness < best_fitness:
                best_fitness = fitness
                best_position = agents[i].copy()
        for i in range(n_agents):
            F = alpha * np.random.rand()
            rand_agent = agents[np.random.randint(n_agents)]
            if np.random.rand() < 0.5:
                agents[i] += F * (rand_agent - agents[i])
            else:
                agents[i] -= F * (rand_agent - agents[i])
            agents[i] = np.clip(agents[i], lb, ub)
    return int(best_position[0])

    # ELM-HBA
best_hidden_hba = honey_badger_elm(X_sub_train, y_sub_train, X_val, y_val)
elm_hba = ELM(input_size=X_train.shape[1], hidden_size=best_hidden_hba, output_size=1)
elm_hba.fit(X_train, y_train)
y_pred_elm_hba = elm_hba.predict(X_test)
elm_hba_acc = accuracy_score(y_test, y_pred_elm_hba)
print("Optimized ELM  (HBA):", accuracy_score(y_test, y_pred_elm_hba))
print(classification_report(y_test, y_pred_elm_hba))

def salp_swarm_elm(X_train, y_train, X_val, y_val, n_salps=10, max_iter=20):
    lb = np.array([10])
    ub = np.array([500])
    salps = np.random.randint(lb[0], ub[0] + 1, size=(n_salps, 1)).astype(float)
    food_position = None
    food_fitness = float("inf")
    for iter in range(max_iter):
        c1 = 2 * np.exp(-(4 * iter / max_iter) ** 2)
        for i in range(n_salps):
            fitness = fitness_function_elm(salps[i], X_train, y_train, X_val, y_val)
            if fitness < food_fitness:
                food_fitness = fitness
                food_position = salps[i].copy()
        for i in range(n_salps):
            for j in range(1):
                if i == 0:
                    c2 = np.random.rand()
                    c3 = np.random.rand()
                    if c3 < 0.5:
                        salps[i, j] = food_position[j] + c1 * ((ub[j] - lb[j]) * c2 + lb[j])
                    else:
                        salps[i, j] = food_position[j] - c1 * ((ub[j] - lb[j]) * c2 + lb[j])
                else:
                    salps[i, j] = (salps[i, j] + salps[i - 1, j]) / 2.0
            salps[i] = np.clip(salps[i], lb, ub)
    return int(food_position[0])

    # ELM-SSA
best_hidden_ssa = salp_swarm_elm(X_sub_train, y_sub_train, X_val, y_val)
elm_ssa = ELM(input_size=X_train.shape[1], hidden_size=best_hidden_ssa, output_size=1)
elm_ssa.fit(X_train, y_train)
y_pred_elm_ssa = elm_ssa.predict(X_test)
elm_ssa_acc = accuracy_score(y_test, y_pred_elm_ssa)
print("Optimized ELM (SSA):", accuracy_score(y_test, y_pred_elm_ssa))
print(classification_report(y_test, y_pred_elm_ssa))



Optimized ELM  (HBA): 0.195
              precision    recall  f1-score   support

           0       0.24      0.21      0.23        47
           1       0.11      0.12      0.11        34
           2       0.20      0.20      0.20        44
           3       0.26      0.21      0.23        38
           4       0.17      0.22      0.19        37

    accuracy                           0.20       200
   macro avg       0.20      0.19      0.19       200
weighted avg       0.20      0.20      0.20       200

Optimized ELM (SSA): 0.15
              precision    recall  f1-score   support

           0       0.12      0.11      0.11        47
           1       0.07      0.09      0.08        34
           2       0.23      0.20      0.22        44
           3       0.19      0.16      0.17        38
           4       0.15      0.19      0.17        37

    accuracy                           0.15       200
   macro avg       0.15      0.15      0.15       200
weighted avg       0.16

In [20]:
# Accuracy Summary
print("\n====== Final Accuracy Summary ======")
print(f"1. Decision Tree (Baseline):         {accuracy_score(y_test, y_pred_dt):.4f}")
print(f"2. ELM (Baseline):                   {elm_accuracy:.4f}")
print(f"3. Decision Tree (HBA):              {hba_accuracy:.4f}")
print(f"4. Decision Tree (SSA):              {ssa_accuracy:.4f}")
print(f"5. ELM Optimized (HBA):              {elm_hba_acc:.4f}")
print(f"6. ELM Optimized (SSA):              {elm_ssa_acc:.4f}")



1. Decision Tree (Baseline):         0.2100
2. ELM (Baseline):                   0.2100
3. Decision Tree (HBA):              0.1950
4. Decision Tree (SSA):              0.1900
5. ELM Optimized (HBA):              0.1950
6. ELM Optimized (SSA):              0.1500


In [21]:
from sklearn.metrics import classification_report, confusion_matrix

def evaluate_model(name, y_true, y_pred):
    print(f"\n==== {name} ====")
    print(classification_report(y_true, y_pred))

    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:\n", cm)

    if cm.shape == (2, 2):  # Binary classification
        TN, FP, FN, TP = cm.ravel()
        sensitivity = TP / (TP + FN) if (TP + FN) != 0 else 0
        specificity = TN / (TN + FP) if (TN + FP) != 0 else 0

        print(f"Sensitivity (Recall): {sensitivity:.4f}")
        print(f"Specificity: {specificity:.4f}")
    else:
        print("Multi-class detected: Sensitivity/Specificity not computed.")

# Apply to all models
evaluate_model("1. Decision Tree (Baseline)", y_test, y_pred_dt)
evaluate_model("2. ELM (Baseline)", y_test, y_pred_elm)
evaluate_model("3. Decision Tree (HBA)", y_test, y_pred_hba)
evaluate_model("4. Decision Tree (SSA)", y_test, y_pred_ssa)
evaluate_model("5. ELM (HBA)", y_test, y_pred_elm_hba)
evaluate_model("6. ELM (SSA)", y_test, y_pred_elm_ssa)



==== 1. Decision Tree (Baseline) ====
              precision    recall  f1-score   support

           0       0.21      0.11      0.14        47
           1       0.21      0.24      0.22        34
           2       0.27      0.27      0.27        44
           3       0.13      0.16      0.14        38
           4       0.23      0.30      0.26        37

    accuracy                           0.21       200
   macro avg       0.21      0.21      0.21       200
weighted avg       0.21      0.21      0.21       200

Confusion Matrix:
 [[ 5 13  9 11  9]
 [ 3  8 11  6  6]
 [ 7  7 12 12  6]
 [ 3  6  8  6 15]
 [ 6  5  5 10 11]]
Multi-class detected: Sensitivity/Specificity not computed.

==== 2. ELM (Baseline) ====
              precision    recall  f1-score   support

           0       0.38      0.19      0.25        47
           1       0.14      0.21      0.17        34
           2       0.16      0.18      0.17        44
           3       0.04      0.03      0.03        38
  