In [1]:
import random
import numpy as np
from deap import base, creator, tools, algorithms


class RLMODE:
    def __init__(self, pop_size, dim, obj_num, lower_bound, upper_bound):
        self.pop_size = pop_size
        self.dim = dim
        self.obj_num = obj_num
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound

        # Initialize Q-table
        self.q_table = np.zeros((3, 3))  # 3 states, 3 actions

        # Setup DEAP
        creator.create("FitnessMulti", base.Fitness, weights=(-1.0,) * obj_num)
        creator.create("Individual", list, fitness=creator.FitnessMulti)

        self.toolbox = base.Toolbox()
        self.toolbox.register("attr_float", random.uniform, lower_bound, upper_bound)
        self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=dim)
        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual, n=pop_size)

        self.toolbox.register("evaluate", self.evaluate)
        self.toolbox.register("mate", tools.cxBlend, alpha=0.5)
        self.toolbox.register("mutate", tools.mutPolynomialBounded, eta=20.0, low=lower_bound, up=upper_bound,
                              indpb=1.0 / dim)
        self.toolbox.register("select", tools.selNSGA2)

        # Initialize control parameters
        self.F = np.random.random(pop_size)
        self.CR = np.random.random(pop_size)
        self.F_f = np.zeros(pop_size)
        self.CR_f = np.zeros(pop_size)

    def evaluate(self, individual):
        # Placeholder for objective function evaluation
        return tuple(random.random() for _ in range(self.obj_num))

    def custom_mutation(self, individual, indpb):
        index = self.population.index(individual)
        for i, x in enumerate(individual):
            if random.random() < indpb:
                a, b, c = random.sample(self.population, 3)
                individual[i] = a[i] + self.F[index] * (b[i] - c[i])
        return individual,

    def custom_crossover(self, ind1, ind2):
        index = self.population.index(ind1)
        for i, (x1, x2) in enumerate(zip(ind1, ind2)):
            if random.random() < self.CR[index]:
                ind1[i] = x2
        return ind1, ind2

    def select_action(self, state):
        return np.argmax(self.q_table[state])

    def update_q_table(self, state, action, reward, next_state):
        learning_rate = 0.1
        discount_factor = 0.9
        old_value = self.q_table[state, action]
        next_max = np.max(self.q_table[next_state])
        new_value = (1 - learning_rate) * old_value + learning_rate * (reward + discount_factor * next_max)
        self.q_table[state, action] = new_value

    def update_control_parameters(self, index, action):
        if action == 0:
            self.F_f[index] = -0.1
            self.CR_f[index] = 0.1
        elif action == 1:
            self.F_f[index] = 0.1
            self.CR_f[index] = 0.1
        else:
            self.F_f[index] = 0
            self.CR_f[index] = 0

        self.F[index] += self.F_f[index]
        self.CR[index] += self.CR_f[index]

        # Ensure F and CR are within [0, 1]
        self.F[index] = np.clip(self.F[index], 0, 1)
        self.CR[index] = np.clip(self.CR[index], 0, 1)

    def run(self, max_generations):
        self.population = self.toolbox.population(n=self.pop_size)

        for gen in range(max_generations):
            offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=0.7, mutpb=0.3)

            for i, (parent, child) in enumerate(zip(self.population, offspring)):
                parent_fitness = self.toolbox.evaluate(parent)
                child_fitness = self.toolbox.evaluate(child)

                # Determine domination
                if all(cf <= pf for cf, pf in zip(child_fitness, parent_fitness)) and any(
                        cf < pf for cf, pf in zip(child_fitness, parent_fitness)):
                    state = 0
                    reward = 1
                elif all(pf <= cf for cf, pf in zip(child_fitness, parent_fitness)) and any(
                        pf < cf for cf, pf in zip(child_fitness, parent_fitness)):
                    state = 1
                    reward = -1
                else:
                    state = 2
                    reward = 0

                # Select action and update control parameters
                action = self.select_action(state)
                self.update_control_parameters(i, action)

                # Update Q-table
                next_state = random.randint(0, 2)  # Simplified transition
                self.update_q_table(state, action, reward, next_state)

                # Selection
                if state != 1:
                    self.population[i] = child

            # Evaluate the entire population
            fitnesses = map(self.toolbox.evaluate, self.population)
            for ind, fit in zip(self.population, fitnesses):
                ind.fitness.values = fit

            # Select the next generation population
            self.population = self.toolbox.select(self.population + offspring, self.pop_size)

        return self.population

In [2]:
# Example usage
pop_size = 50
dim = 30
obj_num = 2
lower_bound = -5
upper_bound = 5
max_generations = 100

rlmode = RLMODE(pop_size, dim, obj_num, lower_bound, upper_bound)
final_population = rlmode.run(max_generations)
print("Optimization completed. Final population size:", len(final_population))

IndexError: tuple index out of range