In [4]:
import random
import numpy as np

class StringMatchingGA:
    def __init__(self, target_string, population_size=100, mutation_rate=0.01, crossover_rate=0.7):
        self.target_string = target_string
        self.population_size = population_size
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.population = self.initialize_population()
    
    def initialize_population(self):
        return [''.join(random.choices('abcdefghijklmnopqrstuvwxyz ', k=len(self.target_string))) for _ in range(self.population_size)]
    
    def fitness(self, individual):
        return sum(1 for a, b in zip(individual, self.target_string) if a == b)
    
    def select_parents(self):
        fitnesses = [self.fitness(ind) for ind in self.population]
        total_fitness = sum(fitnesses)
        selection_probs = [f / total_fitness for f in fitnesses]
        return random.choices(self.population, weights=selection_probs, k=2)
    
    def crossover(self, parent1, parent2):
        if random.random() < self.crossover_rate:
            crossover_point = random.randint(1, len(self.target_string) - 1)
            child1 = parent1[:crossover_point] + parent2[crossover_point:]
            child2 = parent2[:crossover_point] + parent1[crossover_point:]
            return child1, child2
        return parent1, parent2
    
    def mutate(self, individual):
        return ''.join(c if random.random() > self.mutation_rate else random.choice('abcdefghijklmnopqrstuvwxyz ') for c in individual)
    
    def evolve(self):
        new_population = []
        while len(new_population) < self.population_size:
            parent1, parent2 = self.select_parents()
            child1, child2 = self.crossover(parent1, parent2)
            new_population.append(self.mutate(child1))
            new_population.append(self.mutate(child2))
        self.population = new_population
    
    def run(self, generations=100):
        for generation in range(generations):
            self.evolve()
            best_individual = max(self.population, key=self.fitness)
            best_fitness = self.fitness(best_individual)
            print(f"Generation {generation}: Best Individual = {best_individual}, Fitness = {best_fitness}")
            if best_fitness == len(self.target_string):
                print("Target string matched!")
                break

In [5]:
class RLAgent:
    def __init__(self, state_space, action_space, learning_rate=0.1, discount_factor=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01):
        self.state_space = state_space
        self.action_space = action_space
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.Q = {}
    
    def get_action(self, state):
        if np.random.rand() <= self.epsilon:
            return random.choice(self.action_space)
        else:
            return self.get_best_action(state)
    
    def get_best_action(self, state):
        if state not in self.Q:
            self.Q[state] = np.zeros(len(self.action_space))
        return np.argmax(self.Q[state])
    
    def update_q_value(self, state, action, reward, next_state):
        if state not in self.Q:
            self.Q[state] = np.zeros(len(self.action_space))
        if next_state not in self.Q:
            self.Q[next_state] = np.zeros(len(self.action_space))
        
        best_next_action = self.get_best_action(next_state)
        td_target = reward + self.discount_factor * self.Q[next_state][best_next_action]
        td_error = td_target - self.Q[state][action]
        self.Q[state][action] += self.learning_rate * td_error
    
    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

In [9]:
def train_rl_agent(agent, ga, episodes=100):
    for episode in range(episodes):
        state = (ga.mutation_rate, ga.crossover_rate)
        action = agent.get_action(state)
        
        new_mutation_rate = max(0.01, min(0.1, ga.mutation_rate + (action % 3 - 1) * 0.01))
        new_crossover_rate = max(0.5, min(0.9, ga.crossover_rate + (action // 3 - 1) * 0.1))
        
        ga.mutation_rate = new_mutation_rate
        ga.crossover_rate = new_crossover_rate
        
        ga.run(generations=10)
        
        best_fitness = max(ga.fitness(ind) for ind in ga.population)
        reward = best_fitness
        
        next_state = (ga.mutation_rate, ga.crossover_rate)
        agent.update_q_value(state, action, reward, next_state)
        agent.decay_epsilon()
        
        print(f"Episode {episode}: Mutation Rate = {ga.mutation_rate}, Crossover Rate = {ga.crossover_rate}, Best Fitness = {best_fitness}")

target_string = "hello world"
ga = StringMatchingGA(target_string)
agent = RLAgent(state_space=1000, action_space=list(range(9)))  

train_rl_agent(agent, ga)

Generation 0: Best Individual = zealovzvuns, Fitness = 3
Generation 1: Best Individual =  eckbkwahlq, Fitness = 3
Generation 2: Best Individual = hgugooeihlq, Fitness = 3
Generation 3: Best Individual = bxalpmwollq, Fitness = 4
Generation 4: Best Individual = hxtlovzdulz, Fitness = 4
Generation 5: Best Individual = hxtlovgjrli, Fitness = 5
Generation 6: Best Individual = hxtlovgjrli, Fitness = 5
Generation 7: Best Individual = hxalozwollq, Fitness = 6
Generation 8: Best Individual = hxalozwollq, Fitness = 6
Generation 9: Best Individual = hxtlozwollq, Fitness = 6
Episode 0: Mutation Rate = 0.01, Crossover Rate = 0.7, Best Fitness = 6
Generation 0: Best Individual = healomwmrlz, Fitness = 7
Generation 1: Best Individual = hhmlo uorli, Fitness = 7
Generation 2: Best Individual = hhmlo uorlw, Fitness = 7
Generation 3: Best Individual = hhmlo uorlw, Fitness = 7
Generation 4: Best Individual = we lo worlj, Fitness = 8
Generation 5: Best Individual = he lo uorlw, Fitness = 8
Generation 6: Be