In [30]:
import gym
import torch
from torch import nn
from torch.nn import functional as F
import random

In [3]:
import pygad
import pygad.torchga
import numpy as np

In [4]:
env = gym.make("MountainCar-v0")

In [5]:
env.observation_space, env.action_space

(Box([-1.2  -0.07], [0.6  0.07], (2,), float32), Discrete(3))

In [6]:
env.reset()

array([-0.4984456,  0.       ], dtype=float32)

In [7]:
class Network(nn.Module):
    def __init__(self, observation_space, action_space):
        super(Network, self).__init__()
        self.linear1 = nn.Linear(observation_space, 16)
        self.linear2 = nn.Linear(16, 32)
        self.linear3 = nn.Linear(32, action_space)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        x = self.relu(self.linear3(x))
        return x

In [8]:
model = Network(env.observation_space.shape[0],env.action_space.n)

In [42]:
def select_action(model, state): # A more complicated select_action function will also add some exploration
    outputs = model(torch.FloatTensor(state))
    if random.random() > 0.6:
        return random.choice([0, 1, 2])
    else:
        return torch.argmax(outputs).item()

In [43]:
model

Network(
  (linear1): Linear(in_features=2, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=32, bias=True)
  (linear3): Linear(in_features=32, out_features=3, bias=True)
  (relu): ReLU()
)

In [46]:
select_action(model, env.reset())

1

In [73]:
def fitness_func(solution, sol_idx):
    model.load_state_dict(pygad.torchga.model_weights_as_dict(model, solution))
    episode_reward = 0
    state = env.reset()
    for i in range(10000):
        action = select_action(model, state)
        state, reward, done, _ = env.step(action)
        episode_reward += reward
        if done: 
            break
            
    rewards.append(episode_reward)
    return episode_reward

In [74]:
rewards = []

In [75]:
def generation_callback(instance):
    print(f"Generation: {instance.generations_completed}")
    print(f"Fitness: {instance.best_solution()[1]}")
    print(f"Running Reward: {np.mean(rewards[-20:])}")
    if np.mean(rewards[-20:]) > 100:
        return "stop"

In [76]:
gamodel = pygad.torchga.TorchGA(model, 60)

In [77]:
num_generations = 250
num_parents_mating = 6
initial_population = gamodel.population_weights

In [78]:
ga_instance = pygad.GA(num_generations=num_generations, num_parents_mating=num_parents_mating, 
                      fitness_func = fitness_func, initial_population=gamodel.population_weights, 
                      on_generation=generation_callback, mutation_percent_genes=40)

In [79]:
ga_instance.run()

Generation: 1
Fitness: -200.0
Running Reward: -200.0
Generation: 2
Fitness: -200.0
Running Reward: -200.0
Generation: 3
Fitness: -200.0
Running Reward: -200.0
Generation: 4
Fitness: -200.0
Running Reward: -200.0
Generation: 5
Fitness: -200.0
Running Reward: -200.0
Generation: 6
Fitness: -200.0
Running Reward: -200.0
Generation: 7
Fitness: -200.0
Running Reward: -200.0
Generation: 8
Fitness: -200.0
Running Reward: -200.0
Generation: 9
Fitness: -200.0
Running Reward: -200.0
Generation: 10
Fitness: -200.0
Running Reward: -200.0
Generation: 11
Fitness: -200.0
Running Reward: -200.0
Generation: 12
Fitness: -200.0
Running Reward: -200.0
Generation: 13
Fitness: -200.0
Running Reward: -200.0
Generation: 14
Fitness: -200.0
Running Reward: -200.0
Generation: 15
Fitness: -200.0
Running Reward: -200.0
Generation: 16
Fitness: -200.0
Running Reward: -200.0
Generation: 17
Fitness: -200.0
Running Reward: -200.0
Generation: 18
Fitness: -200.0
Running Reward: -200.0
Generation: 19
Fitness: -200.0
Runnin

In [36]:
state = env.reset()
env.render()
for i in range(1, 10000):
    env.render()
    action = model(torch.from_numpy(state)).detach().numpy()
    state, reward, done, info = env.step(action)
    if done:
        break
env.close()

In [38]:
env.reset()

array([ 0.7053849 , -0.70882446,  0.73387295], dtype=float32)