In [None]:
# using genetic algorithms to optimize simple machine learning models, such 
# as the random forest classifier, to implement brain tumor classification
# using tpot runs thru many models to find the best one
# The dataset used for this model is images of brain tumors available on Kaggle
# https://www.kaggle.com/datasets/masoudnickparvar/brain-tumor-mri-dataset
# https://www.geeksforgeeks.org/random-forest-for-image-classification-using-opencv/

In [44]:
# import libraries
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from leap_ec import Individual, context, ops, probe, Representation, test_env_var, util
from leap_ec.algorithm import generational_ea
from leap_ec.decoder import IdentityDecoder
from sklearn.model_selection import cross_val_score
from itertools import product
import cv2
import os

from toolz import pipe

from leap_ec.binary_rep.problems import MaxOnes
from leap_ec.binary_rep.initializers import create_binary_sequence
from leap_ec.binary_rep.ops import mutate_bitflip
from leap_ec.binary_rep.problems import ScalarProblem

In [None]:
# Load Dataset
def load_data(folder_path):
    X = []
    y = []
    for class_name in os.listdir(folder_path):
        class_path = os.path.join(folder_path, class_name)
        if os.path.isdir(class_path):
            for file_name in os.listdir(class_path):
                file_path = os.path.join(class_path, file_name)
                try:
                    img = Image.open(file_path).convert('L')  # 'L' = grayscale
                    img = img.resize((64, 64))  # Resize for consistency
                    img_array = np.array(img).flatten()
                    X.append(img_array)
                    y.append(class_name)
                except Exception as e:
                    print(f"Error loading {file_path}: {e}")
    return np.array(X), np.array(y)

X_train, y_train = load_data('brain_tumor_mri/Training')
X_test, y_test = load_data('brain_tumor_mri/Testing')

In [None]:
# Encode Labels
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

In [52]:
# def fitness_function(individual):
#     # Each gene is now a direct hyperparameter value
#     n_estimators = int(individual[0])
#     max_depth = int(individual[1])
    
#     clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
#     clf.fit(X_train, y_train_enc)
#     y_pred = clf.predict(X_test)
#     return accuracy_score(y_test_enc, y_pred),


class RandomForestGAProblem(ScalarProblem):
    def __init__(self):
        super().__init__(maximize=True)
        # self.X_train = X_train
        # self.y_train_enc = y_train_enc
        # self.X_test = X_test
        # self.y_test_enc = y_test_enc

    def evaluate(self, individual):
        # Each gene is now a direct hyperparameter value
        n_estimators = int(individual[0])
        max_depth = int(individual[1])
        
        clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        # clf.fit(self.X_train, self.y_train_enc)
        # y_pred = clf.predict(self.X_test)
        # return accuracy_score(self.y_test_enc, y_pred),

        acc = np.mean(cross_val_score(clf, self.X_train, self.y_train_enc, cv=5))
        return acc

In [53]:

# --- Genetic Algorithm Parameters ---
POP_SIZE = 100
GENERATIONS = 30
MUTATION_RATE = 0.1
CROSSOVER_RATE = 0.5
TOURNAMENT_SIZES = 5

n_estimators_options = list(range(50, 201, 5))
max_depth_options = list(range(0, 50, 5))  

In [62]:
# initialize hyperparameters for random forest
n_estimators_ct = 0
max_depth_ct = 0

def initialize_hyperparameters():
    global n_estimators_ct, max_depth_ct
    n_estimators = n_estimators_options[n_estimators_ct % len(n_estimators_options)]
    max_depth = max_depth_options[max_depth_ct % len(max_depth_options)]

    n_estimators_ct += 1
    max_depth_ct += 1
    
    return (n_estimators, max_depth)

In [63]:
# --- Evolutionary algorithm ---
parents = Individual.create_population(POP_SIZE,
                                           initialize=initialize_hyperparameters(),
                                           decoder=IdentityDecoder(),
                                           problem=RandomForestGAProblem())




# final_pop = generational_ea(
#     max_generations=GENERATIONS,
#     pop_size=POP_SIZE,
#     representation=Representation(
#         initialize=lambda: [np.random.choice(s) for s in search_space],
#         decode=IdentityDecoder(),
#         individual_cls=Individual
#     ),
#     problem=fitness,
#     pipeline=[
#         ops.tournament_selection(tournament_size=TOURNAMENT_SIZES),
#         ops.clone,
#         ops.uniform_crossover(p=CROSSOVER_RATE),
#         ops.mutate_uniform(search_space=search_space, p=MUTATION_RATE),
#         ops.evaluate,
#         probe.BestSoFarProbe()
#     ]
# )

TypeError: 'tuple' object is not callable

In [None]:
def fitness_function(x, l):
    return (x / ((2 ** l) - 1)) ** 10

class Lab2Problem(ScalarProblem):
    def __init__(self):
        super().__init__(maximize=True)
        
    def evaluate(self, ind):
        binary_str = ''.join(str(int(bit)) for bit in ind)
        x = int(binary_str, 2)
        
        l = len(ind)
        return fitness_function(x, l)

N = POP_SIZE
p_m = MUTATION_RATE
p_c = CROSSOVER_RATE
trn_size = TOURNAMENT_SIZES

max_generation = 10
l = 40
parents = Individual.create_population(N,
                                        initialize=create_binary_sequence(
                                            l),
                                        decoder=IdentityDecoder(),
                                        problem=Lab2Problem())


# Evaluate initial population
parents = Individual.evaluate_population(parents)

generation_counter = util.inc_generation()
while generation_counter.generation() < max_generation:
    offspring = pipe(parents,
                        ops.tournament_selection(k=trn_size),
                        ops.clone,
                        mutate_bitflip(probability=p_m),
                        ops.UniformCrossover(p_xover=p_c),
                        ops.evaluate,
                        ops.pool(size=len(parents))  # accumulate offspring
                    )
    
    parents = offspring
    generation_counter()  # increment to the next generation



In [None]:
best = max(final_pop, key=lambda ind: ind.fitness)
print("Best n_estimators:", int(best.genome[0]))
print("Best max_depth:", int(best.genome[1]))
print("Best test accuracy:", best.fitness[0])