In [56]:
# %load taxi.py
import numpy as np
import sys
from six import StringIO

from gym import spaces, utils
from gym.envs.toy_text import discrete

MAP = [
    "+---------+",
    "|R: | : :G|",
    "| : : : : |",
    "| : : : : |",
    "| | : | : |",
    "|Y| : |B: |",
    "+---------+",
]

class TaxiEnv(discrete.DiscreteEnv):
    """
    The Taxi Problem
    from "Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition"
    by Tom Dietterich

    rendering:
    - blue: passenger
    - magenta: destination
    - yellow: empty taxi
    - green: full taxi
    - other letters: locations

    """
    metadata = {'render.modes': ['human', 'ansi']}

    def __init__(self):
        self.desc = np.asarray(MAP,dtype='c')

        self.locs = locs = [(0,0), (0,4), (4,0), (4,3)]

        nS = 500
        nR = 5
        nC = 5
        maxR = nR-1
        maxC = nC-1
        isd = np.zeros(nS)
        nA = 6
        P = {s : {a : [] for a in range(nA)} for s in range(nS)}
        for row in range(5):
            for col in range(5):
                for passidx in range(5):
                    for destidx in range(4):
                        state = self.encode(row, col, passidx, destidx)
                        if passidx < 4 and passidx != destidx:
                            isd[state] += 1
                        for a in range(nA):
                            # defaults
                            newrow, newcol, newpassidx = row, col, passidx
                            reward = -1
                            done = False
                            taxiloc = (row, col)

                            if a==0:
                                newrow = min(row+1, maxR)
                            elif a==1:
                                newrow = max(row-1, 0)
                            if a==2 and self.desc[1+row,2*col+2]==b":":
                                newcol = min(col+1, maxC)
                            elif a==3 and self.desc[1+row,2*col]==b":":
                                newcol = max(col-1, 0)
                            elif a==4: # pickup
                                if (passidx < 4 and taxiloc == locs[passidx]):
                                    newpassidx = 4
                                else:
                                    reward = -10
                            elif a==5: # dropoff
                                if (taxiloc == locs[destidx]) and passidx==4:
                                    done = True
                                    reward = 20
                                elif (taxiloc in locs) and passidx==4:
                                    newpassidx = locs.index(taxiloc)
                                else:
                                    reward = -10
                            newstate = self.encode(newrow, newcol, newpassidx, destidx)
                            P[state][a].append((1.0, newstate, reward, done))
        isd /= isd.sum()
        discrete.DiscreteEnv.__init__(self, nS, nA, P, isd)

    def encode(self, taxirow, taxicol, passloc, destidx):
        # (5) 5, 5, 4
        i = taxirow
        i *= 5
        i += taxicol
        i *= 5
        i += passloc
        i *= 4
        i += destidx
        return i

    def decode(self, i):
        out = []
        out.append(i % 4)
        i = i // 4
        out.append(i % 5)
        i = i // 5
        out.append(i % 5)
        i = i // 5
        out.append(i)
        assert 0 <= i < 5
        return reversed(out)

    def _render(self, mode='human', close=False):
        if close:
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout

        out = self.desc.copy().tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]
        taxirow, taxicol, passidx, destidx = self.decode(self.s)
        def ul(x): return "_" if x == " " else x
        if passidx < 4:
            out[1+taxirow][2*taxicol+1] = utils.colorize(out[1+taxirow][2*taxicol+1], 'yellow', highlight=True)
            pi, pj = self.locs[passidx]
            out[1+pi][2*pj+1] = utils.colorize(out[1+pi][2*pj+1], 'blue', bold=True)
        else: # passenger in taxi
            out[1+taxirow][2*taxicol+1] = utils.colorize(ul(out[1+taxirow][2*taxicol+1]), 'green', highlight=True)

        di, dj = self.locs[destidx]
        out[1+di][2*dj+1] = utils.colorize(out[1+di][2*dj+1], 'magenta')
        outfile.write("\n".join(["".join(row) for row in out])+"\n")
        if self.lastaction is not None:
            outfile.write("  ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction]))
        else: outfile.write("\n")

        # No need to return anything for human
        if mode != 'human':
            return outfile


In [57]:
env = TaxiEnv()

In [58]:
env.reset()

42

In [59]:
env.render()

+---------+
|[34;1mR[0m: |[43m [0m: :G|
| : : : : |
| : : : : |
| | : | : |
|[35mY[0m| : |B: |
+---------+



# Genetic algorithm

In [60]:
n_states = env.observation_space.n
n_actions = env.action_space.n

print(n_states, n_actions)

500 6


In [61]:
action_to_i = {
    'up':0,
    'down':1,
    'right':2,
    'up':3,
    'pick up':4,
    'drop off':5
}

In [62]:
def get_random_policy():
    """
    Build a numpy array representing agent policy.
    """
    return np.random.randint(0, n_actions, n_states)

In [63]:
def sample_reward(env, policy, t_max=100):
    """
    Interact with an environment, return sum of all rewards.
    If game doesn't end on t_max (e.g. agent walks into a wall), 
    force end the game and return whatever reward you got so far.
    Tip: see signature of env.step(...) method above.
    """
    state = env.reset()
    total_reward = 0
    
    for t in range(t_max):
        action = policy[state]
        new_state, reward, is_done, info = env.step(action)
        state = new_state
        total_reward += reward
        if is_done:
            return total_reward
    return total_reward

In [64]:
def evaluate(env, policy, n_times=20):
    """Run several evaluations and average the score the policy gets."""
    rewards = []
    for _ in range(n_times):
        rewards.append(sample_reward(env, policy))
    return float(np.mean(np.array(rewards)))

In [65]:
import random

def crossover(policy1, policy2, p=0.5):
    """
    for each state, with probability p take action from policy1, else policy2
    """
    result_policy = []
    for i in range(len(policy1)):
        if random.random() < p:
            result_policy.append(policy1[i])
        else:
            result_policy.append(policy2[i])
    return result_policy

In [66]:
def mutation(policy, p=0.1):
    """
    for each state, with probability p replace action with random action
    Tip: mutation can be written as crossover with random policy
    """  
    return crossover(np.random.randint(0, n_actions, len(policy)), policy)

In [67]:
np.random.seed(1234)
policies = [crossover(get_random_policy(), get_random_policy()) 
            for i in range(10 ** 4)]

assert all([len(p) == n_states for p in policies]), 'policy length should always be n_states'
assert np.min(policies) == 0, 'minimal action id should be 0'
assert np.max(policies) == n_actions - 1, 'maximal action id should be n_actions-1'

assert any([np.mean(crossover(np.zeros(n_states), np.ones(n_states))) not in (0, 1)
            for _ in range(100)]), "Make sure your crossover changes each action independently"

In [68]:
n_epochs = 200 #how many cycles to make
pool_size = 100 #how many policies to maintain
n_crossovers = 100 #how many crossovers to make on each step
n_mutations = 100 #how many mutations to make on each tick

In [69]:
print("initializing...")
pool = [get_random_policy() for _ in range(pool_size)]
pool_scores = [evaluate(env, policy) for policy in pool]

initializing...


In [70]:
assert type(pool) == type(pool_scores) == list
assert len(pool) == len(pool_scores) == pool_size
assert all([type(score) in (float, int) for score in pool_scores])

In [71]:
from tqdm import tqdm

#main loop
for epoch in tqdm(range(n_epochs)):
    print(f"Epoch {epoch}")
    
    crossovered = []
    for _ in range(n_crossovers):
        policy1 = pool[random.randint(0, len(pool) - 1)]
        policy2 = pool[random.randint(0, len(pool) - 1)]
        crossovered.append(crossover(policy1, policy2))
    mutated = []
    for i in range(n_mutations):
        mutated.append(mutation(pool[random.randint(0, len(pool) - 1)]))
    
    assert type(crossovered) == type(mutated) == list
    
    #add new policies to the pool
    pool = pool + crossovered + mutated
    pool_scores = [evaluate(env, policy) for policy in pool]
    
    #select pool_size best policies
    selected_indices = np.argsort(pool_scores)[-pool_size:]
    pool = [pool[i] for i in selected_indices]
    pool_scores = [pool_scores[i] for i in selected_indices]

    #print the best policy so far (last in ascending score order)
    print("best score:", pool_scores[-1])


  0%|          | 0/200 [00:00<?, ?it/s][A

Epoch 0



  0%|          | 1/200 [00:05<18:06,  5.46s/it]

best score: -322.75
Epoch 1


  1%|          | 2/200 [00:11<18:19,  5.55s/it]

best score: -324.1
Epoch 2


  2%|▏         | 3/200 [00:18<20:08,  6.14s/it]

best score: -234.55
Epoch 3


  2%|▏         | 4/200 [00:25<20:35,  6.30s/it]

best score: -278.65
Epoch 4


  2%|▎         | 5/200 [00:31<20:09,  6.20s/it]

best score: -278.65
Epoch 5


  3%|▎         | 6/200 [00:37<20:10,  6.24s/it]

best score: -278.65
Epoch 6


  4%|▎         | 7/200 [00:44<20:17,  6.31s/it]

best score: -189.55
Epoch 7


  4%|▍         | 8/200 [00:50<19:47,  6.19s/it]

best score: -279.1
Epoch 8


  4%|▍         | 9/200 [00:55<19:02,  5.98s/it]

best score: -234.55
Epoch 9


  5%|▌         | 10/200 [01:02<20:00,  6.32s/it]

best score: -279.1
Epoch 10


  6%|▌         | 11/200 [01:08<19:45,  6.27s/it]

best score: -189.1
Epoch 11


  6%|▌         | 12/200 [01:15<20:18,  6.48s/it]

best score: -233.65
Epoch 12


  6%|▋         | 13/200 [01:22<20:19,  6.52s/it]

best score: -278.65
Epoch 13


  7%|▋         | 14/200 [01:28<20:05,  6.48s/it]

best score: -235.0
Epoch 14


  8%|▊         | 15/200 [01:34<19:34,  6.35s/it]

best score: -187.75
Epoch 15


  8%|▊         | 16/200 [01:41<19:23,  6.32s/it]

best score: -277.75
Epoch 16


  8%|▊         | 17/200 [01:48<20:41,  6.78s/it]

best score: -233.2
Epoch 17


  9%|▉         | 18/200 [01:55<20:01,  6.60s/it]

best score: -234.55
Epoch 18


 10%|▉         | 19/200 [02:02<20:31,  6.80s/it]

best score: -234.1
Epoch 19


 10%|█         | 20/200 [02:09<20:35,  6.86s/it]

best score: -233.65
Epoch 20


 10%|█         | 21/200 [02:15<19:22,  6.50s/it]

best score: -277.75
Epoch 21


 11%|█         | 22/200 [02:20<18:44,  6.32s/it]

best score: -234.1
Epoch 22


 12%|█▏        | 23/200 [02:26<17:52,  6.06s/it]

best score: -190.0
Epoch 23


 12%|█▏        | 24/200 [02:33<18:47,  6.41s/it]

best score: -190.0
Epoch 24


 12%|█▎        | 25/200 [02:40<19:03,  6.53s/it]

best score: -278.2
Epoch 25


 13%|█▎        | 26/200 [02:47<19:23,  6.69s/it]

best score: -190.0
Epoch 26


 14%|█▎        | 27/200 [02:54<19:15,  6.68s/it]

best score: -277.3
Epoch 27


 14%|█▍        | 28/200 [03:01<19:26,  6.78s/it]

best score: -100.0
Epoch 28


 14%|█▍        | 29/200 [03:06<18:13,  6.39s/it]

best score: -189.55
Epoch 29


 15%|█▌        | 30/200 [03:12<17:17,  6.10s/it]

best score: -233.2
Epoch 30


 16%|█▌        | 31/200 [03:21<19:38,  6.98s/it]

best score: -234.1
Epoch 31


 16%|█▌        | 32/200 [03:26<18:05,  6.46s/it]

best score: -189.55
Epoch 32


 16%|█▋        | 33/200 [03:32<17:18,  6.22s/it]

best score: -145.0
Epoch 33


 17%|█▋        | 34/200 [03:39<17:57,  6.49s/it]

best score: -277.3
Epoch 34


 18%|█▊        | 35/200 [03:46<18:26,  6.71s/it]

best score: -232.75
Epoch 35


 18%|█▊        | 36/200 [03:53<18:43,  6.85s/it]

best score: -233.65
Epoch 36


 18%|█▊        | 37/200 [04:00<18:20,  6.75s/it]

best score: -189.55
Epoch 37


 19%|█▉        | 38/200 [04:08<19:50,  7.35s/it]

best score: -233.65
Epoch 38


 20%|█▉        | 39/200 [04:16<20:04,  7.48s/it]

best score: -187.3
Epoch 39


 20%|██        | 40/200 [04:23<19:46,  7.41s/it]

best score: -189.55
Epoch 40


 20%|██        | 41/200 [04:31<19:53,  7.51s/it]

best score: -189.55
Epoch 41


 21%|██        | 42/200 [04:39<20:18,  7.71s/it]

best score: -189.55
Epoch 42


 22%|██▏       | 43/200 [04:47<19:48,  7.57s/it]

best score: -144.1
Epoch 43


 22%|██▏       | 44/200 [04:53<19:07,  7.35s/it]

best score: -188.65
Epoch 44


 22%|██▎       | 45/200 [05:00<18:28,  7.15s/it]

best score: -188.65
Epoch 45


 23%|██▎       | 46/200 [05:06<17:05,  6.66s/it]

best score: -188.2
Epoch 46


 24%|██▎       | 47/200 [05:11<15:55,  6.24s/it]

best score: -232.3
Epoch 47


 24%|██▍       | 48/200 [05:17<15:26,  6.10s/it]

best score: -144.55
Epoch 48


 24%|██▍       | 49/200 [05:25<16:43,  6.65s/it]

best score: -189.55
Epoch 49


 25%|██▌       | 50/200 [05:33<17:39,  7.07s/it]

best score: -100.0
Epoch 50


 26%|██▌       | 51/200 [05:40<17:50,  7.19s/it]

best score: -145.0
Epoch 51


 26%|██▌       | 52/200 [05:46<17:10,  6.97s/it]

best score: -144.55
Epoch 52


 26%|██▋       | 53/200 [05:55<18:01,  7.36s/it]

best score: -100.0
Epoch 53


 27%|██▋       | 54/200 [06:04<19:22,  7.97s/it]

best score: -188.65
Epoch 54


 28%|██▊       | 55/200 [06:12<19:10,  7.94s/it]

best score: -144.55
Epoch 55


 28%|██▊       | 56/200 [06:20<19:08,  7.97s/it]

best score: -100.0
Epoch 56


 28%|██▊       | 57/200 [06:27<18:12,  7.64s/it]

best score: -100.0
Epoch 57


 29%|██▉       | 58/200 [06:34<17:30,  7.40s/it]

best score: -100.0
Epoch 58


 30%|██▉       | 59/200 [06:41<17:11,  7.32s/it]

best score: -100.0
Epoch 59


 30%|███       | 60/200 [06:48<16:52,  7.23s/it]

best score: -145.0
Epoch 60


 30%|███       | 61/200 [06:57<18:15,  7.88s/it]

best score: -142.75
Epoch 61


 31%|███       | 62/200 [07:04<17:36,  7.66s/it]

best score: -144.1
Epoch 62


 32%|███▏      | 63/200 [07:11<17:02,  7.46s/it]

best score: -100.0
Epoch 63


 32%|███▏      | 64/200 [07:18<16:31,  7.29s/it]

best score: -145.0
Epoch 64


 32%|███▎      | 65/200 [07:25<15:57,  7.10s/it]

best score: -144.55
Epoch 65


 33%|███▎      | 66/200 [07:32<15:46,  7.06s/it]

best score: -100.0
Epoch 66


 34%|███▎      | 67/200 [07:38<14:59,  6.76s/it]

best score: -144.1
Epoch 67


 34%|███▍      | 68/200 [07:48<16:41,  7.59s/it]

best score: -100.0
Epoch 68


 34%|███▍      | 69/200 [07:58<18:11,  8.33s/it]

best score: -100.0
Epoch 69


 35%|███▌      | 70/200 [08:04<16:51,  7.78s/it]

best score: -100.0
Epoch 70


 36%|███▌      | 71/200 [08:12<16:47,  7.81s/it]

best score: -144.55
Epoch 71


 36%|███▌      | 72/200 [08:18<15:28,  7.26s/it]

best score: -100.0
Epoch 72


 36%|███▋      | 73/200 [08:25<15:00,  7.09s/it]

best score: -100.0
Epoch 73


 37%|███▋      | 74/200 [08:34<16:07,  7.67s/it]

best score: -100.0
Epoch 74


 38%|███▊      | 75/200 [08:44<17:24,  8.35s/it]

best score: -100.0
Epoch 75


 38%|███▊      | 76/200 [08:55<19:19,  9.35s/it]

best score: -100.0
Epoch 76


 38%|███▊      | 77/200 [09:03<17:58,  8.77s/it]

best score: -100.0
Epoch 77


 39%|███▉      | 78/200 [09:10<16:43,  8.23s/it]

best score: -100.0
Epoch 78


 40%|███▉      | 79/200 [09:16<15:29,  7.68s/it]

best score: -100.0
Epoch 79


 40%|████      | 80/200 [09:21<13:55,  6.96s/it]

best score: -100.0
Epoch 80


 40%|████      | 81/200 [09:27<12:46,  6.45s/it]

best score: -100.0
Epoch 81


 41%|████      | 82/200 [09:32<11:56,  6.07s/it]

best score: -100.0
Epoch 82


 42%|████▏     | 83/200 [09:37<11:23,  5.84s/it]

best score: -100.0
Epoch 83


 42%|████▏     | 84/200 [09:42<10:59,  5.68s/it]

best score: -100.0
Epoch 84


 42%|████▎     | 85/200 [09:48<10:40,  5.57s/it]

best score: -100.0
Epoch 85


 43%|████▎     | 86/200 [09:53<10:26,  5.50s/it]

best score: -100.0
Epoch 86


 44%|████▎     | 87/200 [09:58<10:14,  5.44s/it]

best score: -100.0
Epoch 87


 44%|████▍     | 88/200 [10:04<10:04,  5.40s/it]

best score: -100.0
Epoch 88


 44%|████▍     | 89/200 [10:09<09:55,  5.37s/it]

best score: -100.0
Epoch 89


 45%|████▌     | 90/200 [10:14<09:44,  5.31s/it]

best score: -100.0
Epoch 90


 46%|████▌     | 91/200 [10:19<09:37,  5.30s/it]

best score: -100.0
Epoch 91


 46%|████▌     | 92/200 [10:25<09:31,  5.29s/it]

best score: -100.0
Epoch 92


 46%|████▋     | 93/200 [10:30<09:25,  5.29s/it]

best score: -100.0
Epoch 93


 47%|████▋     | 94/200 [10:35<09:19,  5.28s/it]

best score: -100.0
Epoch 94


 48%|████▊     | 95/200 [10:41<09:17,  5.31s/it]

best score: -100.0
Epoch 95


 48%|████▊     | 96/200 [10:46<09:10,  5.29s/it]

best score: -100.0
Epoch 96


 48%|████▊     | 97/200 [10:51<09:04,  5.29s/it]

best score: -100.0
Epoch 97


 49%|████▉     | 98/200 [10:57<09:02,  5.31s/it]

best score: -100.0
Epoch 98


 50%|████▉     | 99/200 [11:03<09:40,  5.74s/it]

best score: -100.0
Epoch 99


 50%|█████     | 100/200 [11:10<10:06,  6.06s/it]

best score: -100.0
Epoch 100


 50%|█████     | 101/200 [11:17<10:12,  6.19s/it]

best score: -100.0
Epoch 101


 51%|█████     | 102/200 [11:23<10:01,  6.13s/it]

best score: -100.0
Epoch 102


 52%|█████▏    | 103/200 [11:29<10:13,  6.33s/it]

best score: -100.0
Epoch 103


 52%|█████▏    | 104/200 [11:35<10:01,  6.26s/it]

best score: -100.0
Epoch 104


 52%|█████▎    | 105/200 [11:43<10:33,  6.67s/it]

best score: -100.0
Epoch 105


 53%|█████▎    | 106/200 [11:50<10:47,  6.89s/it]

best score: -100.0
Epoch 106


 54%|█████▎    | 107/200 [11:58<11:06,  7.17s/it]

best score: -100.0
Epoch 107


 54%|█████▍    | 108/200 [12:05<10:37,  6.93s/it]

best score: -100.0
Epoch 108


 55%|█████▍    | 109/200 [12:12<10:46,  7.11s/it]

best score: -100.0
Epoch 109


 55%|█████▌    | 110/200 [12:19<10:25,  6.95s/it]

best score: -100.0
Epoch 110


 56%|█████▌    | 111/200 [12:28<11:18,  7.62s/it]

best score: -100.0
Epoch 111


 56%|█████▌    | 112/200 [12:37<11:43,  7.99s/it]

best score: -100.0
Epoch 112


 56%|█████▋    | 113/200 [12:48<12:54,  8.90s/it]

best score: -100.0
Epoch 113


 57%|█████▋    | 114/200 [13:03<15:23, 10.74s/it]

best score: -100.0
Epoch 114


 57%|█████▊    | 115/200 [13:16<16:19, 11.52s/it]

best score: -100.0
Epoch 115


 58%|█████▊    | 116/200 [13:23<14:20, 10.24s/it]

best score: -100.0
Epoch 116


 58%|█████▊    | 117/200 [13:38<16:04, 11.62s/it]

best score: -100.0
Epoch 117


 59%|█████▉    | 118/200 [13:55<17:55, 13.11s/it]

best score: -100.0
Epoch 118


 60%|█████▉    | 119/200 [14:14<19:58, 14.79s/it]

best score: -100.0
Epoch 119


 60%|██████    | 120/200 [14:24<18:01, 13.52s/it]

best score: -100.0
Epoch 120


 60%|██████    | 121/200 [14:35<16:49, 12.78s/it]

best score: -100.0
Epoch 121


 61%|██████    | 122/200 [14:46<15:58, 12.28s/it]

best score: -100.0
Epoch 122


 62%|██████▏   | 123/200 [14:58<15:23, 11.99s/it]

best score: -100.0
Epoch 123


 62%|██████▏   | 124/200 [15:10<15:14, 12.04s/it]

best score: -100.0
Epoch 124


 62%|██████▎   | 125/200 [15:21<14:46, 11.83s/it]

best score: -100.0
Epoch 125


 63%|██████▎   | 126/200 [15:33<14:30, 11.76s/it]

best score: -100.0
Epoch 126


 64%|██████▎   | 127/200 [15:44<14:07, 11.61s/it]

best score: -100.0
Epoch 127


 64%|██████▍   | 128/200 [15:56<13:53, 11.58s/it]

best score: -100.0
Epoch 128


 64%|██████▍   | 129/200 [16:07<13:42, 11.58s/it]

best score: -100.0
Epoch 129


 65%|██████▌   | 130/200 [16:19<13:31, 11.59s/it]

best score: -100.0
Epoch 130


 66%|██████▌   | 131/200 [16:30<13:18, 11.57s/it]

best score: -100.0
Epoch 131


 66%|██████▌   | 132/200 [16:42<13:03, 11.52s/it]

best score: -100.0
Epoch 132


 66%|██████▋   | 133/200 [16:53<12:53, 11.55s/it]

best score: -100.0
Epoch 133


 67%|██████▋   | 134/200 [17:05<12:49, 11.66s/it]

best score: -100.0
Epoch 134


 68%|██████▊   | 135/200 [17:17<12:34, 11.61s/it]

best score: -100.0
Epoch 135


 68%|██████▊   | 136/200 [17:28<12:20, 11.58s/it]

best score: -100.0
Epoch 136


 68%|██████▊   | 137/200 [17:40<12:09, 11.58s/it]

best score: -100.0
Epoch 137


 69%|██████▉   | 138/200 [17:51<11:54, 11.52s/it]

best score: -100.0
Epoch 138


 70%|██████▉   | 139/200 [18:02<11:38, 11.45s/it]

best score: -100.0
Epoch 139


 70%|███████   | 140/200 [18:14<11:35, 11.59s/it]

best score: -100.0
Epoch 140


 70%|███████   | 141/200 [18:26<11:23, 11.58s/it]

best score: -100.0
Epoch 141


 71%|███████   | 142/200 [18:38<11:15, 11.65s/it]

best score: -100.0
Epoch 142


 72%|███████▏  | 143/200 [18:49<11:00, 11.59s/it]

best score: -100.0
Epoch 143


 72%|███████▏  | 144/200 [19:01<10:46, 11.54s/it]

best score: -100.0
Epoch 144


 72%|███████▎  | 145/200 [19:12<10:33, 11.52s/it]

best score: -100.0
Epoch 145


 73%|███████▎  | 146/200 [19:23<10:17, 11.44s/it]

best score: -100.0
Epoch 146


 74%|███████▎  | 147/200 [19:35<10:03, 11.38s/it]

best score: -100.0
Epoch 147


 74%|███████▍  | 148/200 [19:46<09:50, 11.36s/it]

best score: -100.0
Epoch 148


 74%|███████▍  | 149/200 [19:57<09:42, 11.42s/it]

best score: -100.0
Epoch 149


 75%|███████▌  | 150/200 [20:09<09:32, 11.45s/it]

best score: -100.0
Epoch 150


 76%|███████▌  | 151/200 [20:20<09:20, 11.44s/it]

best score: -100.0
Epoch 151


 76%|███████▌  | 152/200 [20:32<09:10, 11.48s/it]

best score: -100.0
Epoch 152


 76%|███████▋  | 153/200 [20:43<08:58, 11.45s/it]

best score: -100.0
Epoch 153


 77%|███████▋  | 154/200 [20:55<08:45, 11.41s/it]

best score: -100.0
Epoch 154


 78%|███████▊  | 155/200 [21:06<08:33, 11.42s/it]

best score: -100.0
Epoch 155


 78%|███████▊  | 156/200 [21:18<08:23, 11.44s/it]

best score: -100.0
Epoch 156


 78%|███████▊  | 157/200 [21:29<08:10, 11.41s/it]

best score: -100.0
Epoch 157


 79%|███████▉  | 158/200 [21:41<08:01, 11.47s/it]

best score: -100.0
Epoch 158


 80%|███████▉  | 159/200 [21:52<07:53, 11.54s/it]

best score: -100.0
Epoch 159


 80%|████████  | 160/200 [22:05<07:51, 11.78s/it]

best score: -100.0
Epoch 160


 80%|████████  | 161/200 [22:19<08:05, 12.44s/it]

best score: -100.0
Epoch 161


 81%|████████  | 162/200 [22:30<07:47, 12.30s/it]

best score: -100.0
Epoch 162


 82%|████████▏ | 163/200 [22:42<07:24, 12.02s/it]

best score: -100.0
Epoch 163


 82%|████████▏ | 164/200 [22:53<07:06, 11.84s/it]

best score: -100.0
Epoch 164


 82%|████████▎ | 165/200 [23:05<06:55, 11.87s/it]

best score: -100.0
Epoch 165


 83%|████████▎ | 166/200 [23:19<07:01, 12.39s/it]

best score: -100.0
Epoch 166


 84%|████████▎ | 167/200 [23:31<06:49, 12.41s/it]

best score: -100.0
Epoch 167


 84%|████████▍ | 168/200 [23:43<06:30, 12.19s/it]

best score: -100.0
Epoch 168


 84%|████████▍ | 169/200 [23:55<06:12, 12.01s/it]

best score: -100.0
Epoch 169


 85%|████████▌ | 170/200 [24:06<05:53, 11.80s/it]

best score: -100.0
Epoch 170


 86%|████████▌ | 171/200 [24:18<05:41, 11.76s/it]

best score: -100.0
Epoch 171


 86%|████████▌ | 172/200 [24:29<05:26, 11.67s/it]

best score: -100.0
Epoch 172


 86%|████████▋ | 173/200 [24:41<05:16, 11.71s/it]

best score: -100.0
Epoch 173


 87%|████████▋ | 174/200 [24:53<05:05, 11.76s/it]

best score: -100.0
Epoch 174


 88%|████████▊ | 175/200 [25:04<04:54, 11.77s/it]

best score: -100.0
Epoch 175


 88%|████████▊ | 176/200 [25:16<04:40, 11.67s/it]

best score: -100.0
Epoch 176


 88%|████████▊ | 177/200 [25:27<04:27, 11.63s/it]

best score: -100.0
Epoch 177


 89%|████████▉ | 178/200 [25:39<04:16, 11.68s/it]

best score: -100.0
Epoch 178


 90%|████████▉ | 179/200 [25:51<04:04, 11.63s/it]

best score: -100.0
Epoch 179


 90%|█████████ | 180/200 [26:02<03:52, 11.63s/it]

best score: -100.0
Epoch 180


 90%|█████████ | 181/200 [26:14<03:39, 11.55s/it]

best score: -100.0
Epoch 181


 91%|█████████ | 182/200 [26:25<03:28, 11.58s/it]

best score: -100.0
Epoch 182


 92%|█████████▏| 183/200 [26:37<03:16, 11.58s/it]

best score: -100.0
Epoch 183


 92%|█████████▏| 184/200 [26:49<03:05, 11.58s/it]

best score: -100.0
Epoch 184


 92%|█████████▎| 185/200 [27:01<02:55, 11.73s/it]

best score: -100.0
Epoch 185


 93%|█████████▎| 186/200 [27:13<02:45, 11.80s/it]

best score: -100.0
Epoch 186


 94%|█████████▎| 187/200 [27:24<02:32, 11.73s/it]

best score: -100.0
Epoch 187


 94%|█████████▍| 188/200 [27:36<02:20, 11.72s/it]

best score: -100.0
Epoch 188


 94%|█████████▍| 189/200 [27:47<02:08, 11.68s/it]

best score: -100.0
Epoch 189


 95%|█████████▌| 190/200 [27:59<01:56, 11.62s/it]

best score: -100.0
Epoch 190


 96%|█████████▌| 191/200 [28:10<01:44, 11.59s/it]

best score: -100.0
Epoch 191


 96%|█████████▌| 192/200 [28:22<01:31, 11.50s/it]

best score: -100.0
Epoch 192


 96%|█████████▋| 193/200 [28:33<01:20, 11.56s/it]

best score: -100.0
Epoch 193


 97%|█████████▋| 194/200 [28:45<01:09, 11.65s/it]

best score: -100.0
Epoch 194


 98%|█████████▊| 195/200 [28:57<00:58, 11.69s/it]

best score: -100.0
Epoch 195


 98%|█████████▊| 196/200 [29:09<00:46, 11.73s/it]

best score: -100.0
Epoch 196


 98%|█████████▊| 197/200 [29:20<00:34, 11.66s/it]

best score: -100.0
Epoch 197


 99%|█████████▉| 198/200 [29:32<00:23, 11.65s/it]

best score: -100.0
Epoch 198


100%|█████████▉| 199/200 [29:44<00:11, 11.67s/it]

best score: -100.0
Epoch 199


100%|██████████| 200/200 [29:55<00:00, 11.52s/it]

best score: -100.0



