In [50]:
import gym
import random
import numpy as np

In [51]:
env = gym.make('CartPole-v1') # render_mode="human"
obs = env.reset()
print(obs)
observation_space = env.observation_space
print(observation_space)
env.close()

(array([ 0.00012034,  0.01392482,  0.03484299, -0.01587112], dtype=float32), {})
Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)


# Real code

## Agents

In [52]:
class RandomAgent():
    def __init__(self, env):
        self.name = "Random"
        self.action_size = env.action_space.n
    
    def get_action(self, state):
        action = random.choice(range(self.action_size))
        return action

In [53]:
class SimpleAgent():
    def __init__(self, env):
        self.name = 'Simple'
        self.action_size = env.action_space.n
    
    def get_action(self, state):
        pole_angle = state[2]
        action = 0 if pole_angle < 0 else 1
        return action

In [54]:
import pickle

class MachineAgent():
    def __init__(self, network=None, action_size=1, observation_size=2):
        self.name = 'Machine'
        self.action_size = action_size
        if network is None:
            self.network = Network(observation_size, self.action_size, [5, 5])
        else:
            self.network = network
    
    def get_action(self, state):
        # State is a vector of size in_size
        # Output is a vector of size out_size
        output = self.network.predict(state)
        if output < -1:
            output = -1
        elif output > 1:
            output = 1
            
        return np.array([output])

    def save(self):
        self.network.save()

class Network():
    def __init__(self, in_size, out_size, layers: list[int] = None, model=None):
        self.in_size = in_size
        self.out_size = out_size
        self.layers = layers
        if model is None and layers is not None:
            self.model = self._build_model()
        else:
            self.model = model
    
    def save(self, name):
        path = f'./models/{name}.pkl'
        with open(path, 'wb') as f:
            pickle.dump(self.model, f)
    
    def load(self, name):
        path = f'./models/{name}.pkl'
        with open(path, 'rb') as f:
            self.model = pickle.load(f)

    def _build_model(self):
        dense_layer_weights = []
        dense_layer_biases = []
        input_size = self.in_size
        for layer_size in self.layers:
            weight_matrix = np.random.rand(input_size, layer_size)
            bias_vector = np.random.rand(layer_size)
            dense_layer_weights.append(weight_matrix)
            dense_layer_biases.append(bias_vector)
            input_size = layer_size
        
        weight_matrix = np.random.rand(input_size, self.out_size)
        bias_vector = np.random.rand(self.out_size)
        dense_layer_weights.append(weight_matrix)
        dense_layer_biases.append(bias_vector)

        return dense_layer_weights, dense_layer_biases
    
    def predict(self, state):
        output = self.forward(state)
        action = np.argmax(output)
        return action
    
    def forward(self, state):
        # State is a vector of size in_size
        # Output is a vector of size out_size
        weights, biases = self.model

        # Forward pass
        output = state
        for weight, bias in zip(weights, biases):
            output = np.matmul(output, weight) + bias
            output = self.leaky_relu(output)
        
        return output

    def leaky_relu(self, x, alpha=0.01):
        return np.maximum(alpha * x, x)

## Machine learning

In [55]:
def mutate(network, mutation_rate=0.02, mutation_amount=0.05):
    weights, biases = network.model
    mutated_weights = []
    mutated_biases = []

    # Iterate through the weights and biases and apply mutations
    for weight_matrix, bias_vector in zip(weights, biases):
        # Create a mask to identify the elements to be mutated
        weight_mask = np.random.rand(*weight_matrix.shape) < mutation_rate
        bias_mask = np.random.rand(*bias_vector.shape) < mutation_rate

        # Generate random Gaussian noise to be added to the elements to be mutated
        weight_noise = np.random.randn(*weight_matrix.shape) * mutation_amount
        bias_noise = np.random.randn(*bias_vector.shape) * mutation_amount

        # Apply mutations using the masks
        mutated_weight_matrix = weight_matrix + weight_mask * weight_noise
        mutated_bias_vector = bias_vector + bias_mask * bias_noise

        mutated_weights.append(mutated_weight_matrix)
        mutated_biases.append(mutated_bias_vector)

    # Replace the original model with the mutated version
    network.model = (mutated_weights, mutated_biases)

def crossover(parent1, parent2):
    weights1, biases1 = parent1.model
    weights2, biases2 = parent2.model

    # Initializing offspring's weights and biases
    offspring_weights = []
    offspring_biases = []

    # Iterate through the parent's weights and biases and perform crossover
    for (weight_matrix1, bias_vector1), (weight_matrix2, bias_vector2) in zip(zip(weights1, biases1), zip(weights2, biases2)):
        # Crossover weights
        crossover_point = np.random.randint(0, weight_matrix1.shape[1])
        offspring_weight_matrix = np.hstack((weight_matrix1[:, :crossover_point], weight_matrix2[:, crossover_point:]))

        # Crossover biases
        crossover_point = np.random.randint(0, bias_vector1.shape[0])
        offspring_bias_vector = np.concatenate((bias_vector1[:crossover_point], bias_vector2[crossover_point:]))

        offspring_weights.append(offspring_weight_matrix)
        offspring_biases.append(offspring_bias_vector)

    # Create a new network with the crossed-over weights and biases
    offspring = Network(parent1.in_size, parent1.out_size, parent1.layers, model=(offspring_weights, offspring_biases))

    return offspring

## Runtime functions

In [56]:
class DoneException(Exception):
    # Store the agent
    def __init__(self, agent):
        self.agent = agent

def get_agent_score(agent, env, close_on_finish=True, n_trials=10, max_steps=5000, type = 'cartpole'):
    if env is None:
        if type == 'cartpole':
            env = gym.make('CartPole-v1')
        elif type == 'mountaincar':
            env = gym.make('MountainCar-v0')
        elif type == 'mountaincar_cont':
            env = gym.make('MountainCarContinuous-v0')

    tot_score = 0
    for _ in range(n_trials):
        obs = env.reset()[0]
        done = False
        score = 0
        steps = 0
        while not done:
            action = agent.get_action(obs)
            obs, reward, done, info, _ = env.step(action)
            score += reward
            steps += 1
            if steps > max_steps:
                break
                raise DoneException(agent)

        tot_score += score

    if close_on_finish:
        env.close()

    return tot_score / n_trials

def visualize_agent(agent, max_steps=1000, type = 'cartpole'):
    if type == 'cartpole':
        env = gym.make('CartPole-v1', render_mode='human')
    elif type == 'mountaincar':
        env = gym.make('MountainCar-v0', render_mode='human')
    elif type == 'mountaincar_cont':
        env = gym.make('MountainCarContinuous-v0', render_mode='human')
        
    obs = env.reset()[0]
    done = False
    score = 0
    step = 0
    while not done:
        action = agent.get_action(obs)
        obs, reward, done, info, _ = env.step(action)
        score += reward
        step += 1
        if step > max_steps:
            break

    env.close()
    print("Score:", score)

def compare_agents(agents):
    agent_to_score = {}
    for agent in agents:
        try:
            score = get_agent_score(agent)
        except DoneException as e:
            agent_to_score[agent] = -float("inf")
    
    # Sort by score
    sorted_agents = sorted(agent_to_score.items(), key=lambda x: x[1], reverse=True)
    # Pretty print
    print("🏆🏆🏆 Leaderboard 🏆🏆🏆")
    print("-------------------------------")
    print("Rank | Name          | Score")
    print("-------------------------------")
    rank = 1
    for agent, score in sorted_agents:
        medal = "    "
        if rank == 1:
            medal = "🥇 "
        elif rank == 2:
            medal = "🥈 "
        elif rank == 3:
            medal = "🥉 "

        name = agent.name
        # Adjust the following number to match the longest agent name in your list
        padding = 15 - len(name)
        print(f"{medal} {rank: <4}| {name: <{padding + len(name)}}| {score}")
        rank += 1

    print("-------------------------------")

### GA

In [57]:
def select_parents(population, fitness, tournament_size=3):
    parents = []
    for _ in range(len(population)):
        tournament_indices = np.random.choice(len(population), tournament_size, replace=False)
        tournament_fitness = [fitness[idx] for idx in tournament_indices]
        best_in_tournament_idx = tournament_indices[np.argmax(tournament_fitness)]
        parents.append(population[best_in_tournament_idx])
    return parents

def best_individual(population):
    fitness_values = get_fitness(population, max_steps=250) # Evaluating the fitness of each individual in the population
    best_index = np.argmax(fitness_values) # Finding the index of the best individual
    return population[best_index] # Returning the best individual

def get_fitness(population, max_steps = 500, type = 'mountaincar_cont'):
    scores = []
    if type == 'cartpole':
        env = gym.make('CartPole-v1')
    elif type == 'mountaincar':
        env = gym.make('MountainCar-v0')
    elif type == 'mountaincar_cont':
        env = gym.make('MountainCarContinuous-v0')
        
    for network in population:
        agent = MachineAgent(network)
        try:
            score = get_agent_score(agent, env, False, 1, max_steps, type)
        except DoneException as e:
            score = float("-inf")

        scores.append(score)
    return scores

def select_next_population(parents, offspring, population, population_size):
    new_pop = offspring
    i = 0
    while len(new_pop) < population_size:
        if i >= len(parents):
            print('This should not happen (pop added to pop)')
            new_pop.append(population[i])
        else:
            new_pop.append(parents[i])
        i += 1
    
    if len(new_pop) > population_size:
        print('This should not happen (too large pop)')
        new_pop = new_pop[:population_size]

    return new_pop

def genetic_algorithm(population_size, generations, in_size, out_size, layers, mutation_rate, mutation_amount, crossover_rate):
    # Initialize population
    population = [Network(in_size, out_size, layers) for _ in range(population_size)]
    
    print("🧬 Genetic Algorithm Started 🧬")

    for generation in range(generations):
        # Evaluate fitness
        fitness = get_fitness(population)

        # Log best fitness of the generation
        best_fitness = max(fitness)
        print(f"Generation {generation}: Best Fitness = {best_fitness} 🏆")

        # Select parents
        parents = select_parents(population, fitness)

        # Crossover
        offspring = []
        for parent1, parent2 in zip(parents[::2], parents[1::2]):
            if np.random.rand() < crossover_rate:
                offspring.append(crossover(parent1, parent2))
            else:
                offspring.append(parent1)
                offspring.append(parent2)

        # Mutate
        for child in offspring:
            mutate(child, mutation_rate, mutation_amount)

        # Select next generation
        population = select_next_population(parents, offspring, population, population_size)
    
    # Return the best individual
    print(f"Getting best agent...")
    best_agent = best_individual(population)
    print(f"🎖️ Genetic Algorithm Completed: Best Agent Found 🎖️")
    return best_agent

## Runtime

In [58]:
trained_network = genetic_algorithm(population_size=1000, generations=2, in_size=2, out_size=1, layers=[5, 5], mutation_rate=0.01, mutation_amount=0.1, crossover_rate=0.5)
trained_network.save('good_agent_v1')

good_agent = MachineAgent(trained_network, 2)

visualize_agent(good_agent, 1000, 'mountaincar_cont')

🧬 Genetic Algorithm Started 🧬
Generation 0: Best Fitness = 0.0 🏆
Generation 1: Best Fitness = 0.0 🏆
Getting best agent...
🎖️ Genetic Algorithm Completed: Best Agent Found 🎖️
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0

In [59]:
trained_network = Network(2, 4, [5, 5], None)
# trained_network.load('good_agent_v1')

agent = MachineAgent(trained_network, 2)

visualize_agent(agent, 500, 'mountaincar_cont')

[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
