In [3]:
!pip install gym



In [2]:
import gym
import numpy as np
import tensorflow as tf
from Memory import Memory
from Environment import Environment, GymWrapper
from Agents import A2CAgent
from tensorflow import keras
from Policies import Policy
import h5py


In [3]:
g_env = gym.make('MountainCar-v0')
max_steps = g_env._max_episode_steps
print(max_steps)
print(g_env.observation_space, g_env.action_space)

env = GymWrapper(g_env)

inputs = keras.layers.Input(shape=env.state_shape)


200
Box(-1.2000000476837158, 0.6000000238418579, (2,), float32) Discrete(3)


In [7]:
# Creating models
x = keras.layers.Dense(64, activation='relu', kernel_initializer='he_normal')(inputs)
x = keras.layers.BatchNormalization(epsilon=1e-5, momentum=0.99)(x)
x = keras.layers.Dense(64, activation='relu', kernel_initializer='he_normal')(x)
x = keras.layers.BatchNormalization(epsilon=1e-5, momentum=0.99)(x)

# Build actor model
actor_outputs = keras.layers.Dense(env.n_actions, activation='softmax')(x)
actor = keras.Model(inputs=inputs, outputs=actor_outputs)
actor.compile(optimizer=keras.optimizers.Adam(.001), loss='mse')
actor.summary()

# Build critic model
critic_outputs = keras.layers.Dense(1)(x)
critic = keras.Model(inputs=inputs, outputs=critic_outputs)
critic.compile(optimizer=keras.optimizers.Adam(.001), loss='mse')
critic.summary()


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 2)]               0         
_________________________________________________________________
dense (Dense)                (None, 64)                192       
_________________________________________________________________
batch_normalization (BatchNo (None, 64)                256       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 195       
Total params: 5,059
Trainable params: 4,803
Non-trainable params: 256
_________________________________________________________

In [8]:
# Create agent
agent = A2CAgent(a_model=actor,
                 c_model=critic,
                 discounted_rate=.99,
                 lambda_rate=.95,
                 create_memory=lambda shape, dtype: Memory(capacity=20000)
                 )



In [6]:
# Training the agent
def end_episode_callback(episode, reward):
    global agent
    if reward > -150:
        old_playing_data = agent.playing_data
        agent.set_playing_data(training=False, memorizing=False)
        result = env.run_episodes(agent=agent,
                                  num_episodes=10,
                                  max_steps=max_steps,
                                  verbose=False,
                                  episode_verbose=False,
                                  render=False
                                  )
        print(f'Validate results: {result}')
        if result >= -110:
            agent.save(save_dir, note=f'A2C_{episode}_{result}')
        agent.playing_data = old_playing_data
        if result >= -100:  # end early
            return True


agent.set_playing_data(
    training=True, memorizing=True,
    batch_size=16, mini_batch=1024,
    epochs=1, repeat=5,
    entropy_coef=0,
    verbose=False
)

save_dir = 'models'
num_episodes = 500
result = env.run_episodes(
    agent, num_episodes=500, max_steps=max_steps,
    verbose=True, episode_verbose=False,
    render=False,
    end_episode_callback=end_episode_callback
)

agent.save(save_dir, note=f'A2C_{result}')

Time: 02:00:56 - Episode: 1 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -200.0 - Average Total Reward: -200.0 - Memory Size: 200
Time: 02:00:57 - Episode: 2 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -200.0 - Average Total Reward: -200.0 - Memory Size: 400
Time: 02:00:58 - Episode: 3 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -200.0 - Average Total Reward: -200.0 - Memory Size: 600
Time: 02:00:59 - Episode: 4 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -200.0 - Average Total Reward: -200.0 - Memory Size: 800
Time: 02:01:00 - Episode: 5 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -200.0 - Average Total Reward: -200.0 - Memory Size: 1000
Time: 02:01:01 - Episode: 6 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -200.0 - Average Total Reward: -200.0 - Memory Size: 1200
Time: 02:01:02 - Episode: 7 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -200.0 - Average Total Reward: -200.0 - Memory Size: 1400
Tim

Time: 02:02:05 - Episode: 56 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.73214285714286 - Memory Size: 11185
Time: 02:02:06 - Episode: 57 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.73684210526315 - Memory Size: 11385
Time: 02:02:08 - Episode: 58 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.74137931034483 - Memory Size: 11585
Time: 02:02:10 - Episode: 59 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.74576271186442 - Memory Size: 11785
Time: 02:02:11 - Episode: 60 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.75 - Memory Size: 11985
Time: 02:02:12 - Episode: 61 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.75409836065575 - Memory Size: 12185
Time: 02:02:14 - Episode: 62 - Steps: 200 - Total Reward: -200.0

Time: 02:03:28 - Episode: 109 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.86238532110093 - Memory Size: 20000
Time: 02:03:29 - Episode: 110 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.86363636363637 - Memory Size: 20000
Time: 02:03:31 - Episode: 111 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.86486486486487 - Memory Size: 20000
Time: 02:03:32 - Episode: 112 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.86607142857142 - Memory Size: 20000
Time: 02:03:34 - Episode: 113 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.86725663716814 - Memory Size: 20000
Time: 02:03:36 - Episode: 114 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -185.0 - Average Total Reward: -199.8684210526316 - Memory Size: 20000
Time: 02:03:37 - Episode: 115 - Steps: 200 - To

Time: 02:04:50 - Episode: 161 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -155.0 - Average Total Reward: -198.80745341614906 - Memory Size: 20000
Time: 02:04:52 - Episode: 162 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -155.0 - Average Total Reward: -198.8148148148148 - Memory Size: 20000
Time: 02:04:54 - Episode: 163 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -155.0 - Average Total Reward: -198.82208588957056 - Memory Size: 20000
Time: 02:04:55 - Episode: 164 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -155.0 - Average Total Reward: -198.82926829268294 - Memory Size: 20000
Time: 02:04:57 - Episode: 165 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -155.0 - Average Total Reward: -198.83636363636364 - Memory Size: 20000
Time: 02:04:58 - Episode: 166 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -155.0 - Average Total Reward: -198.84337349397592 - Memory Size: 20000
Time: 02:05:00 - Episode: 167 - Steps: 200 - To

Time: 02:06:17 - Episode: 213 - Steps: 146 - Total Reward: -146.0 - Best Total Reward: -144.0 - Average Total Reward: -198.3661971830986 - Memory Size: 20000
Validate results: -163.6
Time: 02:06:23 - Episode: 214 - Steps: 147 - Total Reward: -147.0 - Best Total Reward: -144.0 - Average Total Reward: -198.12616822429908 - Memory Size: 20000
Validate results: -146.9
Time: 02:06:28 - Episode: 215 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -144.0 - Average Total Reward: -198.13488372093022 - Memory Size: 20000
Time: 02:06:29 - Episode: 216 - Steps: 123 - Total Reward: -123.0 - Best Total Reward: -123.0 - Average Total Reward: -197.78703703703704 - Memory Size: 20000
Validate results: -143.6
Time: 02:06:34 - Episode: 217 - Steps: 125 - Total Reward: -125.0 - Best Total Reward: -123.0 - Average Total Reward: -197.4516129032258 - Memory Size: 20000
Validate results: -127.6
Time: 02:06:38 - Episode: 218 - Steps: 115 - Total Reward: -115.0 - Best Total Reward: -115.0 - Average Tot

Time: 02:09:07 - Episode: 261 - Steps: 161 - Total Reward: -161.0 - Best Total Reward: -97.0 - Average Total Reward: -188.28735632183907 - Memory Size: 20000
Time: 02:09:09 - Episode: 262 - Steps: 155 - Total Reward: -155.0 - Best Total Reward: -97.0 - Average Total Reward: -188.16030534351145 - Memory Size: 20000
Time: 02:09:11 - Episode: 263 - Steps: 162 - Total Reward: -162.0 - Best Total Reward: -97.0 - Average Total Reward: -188.06083650190115 - Memory Size: 20000
Time: 02:09:12 - Episode: 264 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -97.0 - Average Total Reward: -188.1060606060606 - Memory Size: 20000
Time: 02:09:14 - Episode: 265 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -97.0 - Average Total Reward: -188.1509433962264 - Memory Size: 20000
Time: 02:09:16 - Episode: 266 - Steps: 149 - Total Reward: -149.0 - Best Total Reward: -97.0 - Average Total Reward: -188.00375939849624 - Memory Size: 20000
Validate results: -159.7
Time: 02:09:20 - Episode: 267

Time: 02:11:15 - Episode: 311 - Steps: 158 - Total Reward: -158.0 - Best Total Reward: -85.0 - Average Total Reward: -183.39228295819936 - Memory Size: 20000
Time: 02:11:16 - Episode: 312 - Steps: 120 - Total Reward: -120.0 - Best Total Reward: -85.0 - Average Total Reward: -183.18910256410257 - Memory Size: 20000
Validate results: -151.7
Time: 02:11:21 - Episode: 313 - Steps: 114 - Total Reward: -114.0 - Best Total Reward: -85.0 - Average Total Reward: -182.96805111821087 - Memory Size: 20000
Validate results: -160.6
Time: 02:11:26 - Episode: 314 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -85.0 - Average Total Reward: -183.02229299363057 - Memory Size: 20000
Time: 02:11:27 - Episode: 315 - Steps: 124 - Total Reward: -124.0 - Best Total Reward: -85.0 - Average Total Reward: -182.83492063492062 - Memory Size: 20000
Validate results: -137.3
Time: 02:11:32 - Episode: 316 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -85.0 - Average Total Reward: -182.8892405063291

Time: 02:13:58 - Episode: 358 - Steps: 125 - Total Reward: -125.0 - Best Total Reward: -85.0 - Average Total Reward: -175.8072625698324 - Memory Size: 20000
Validate results: -118.9
Time: 02:14:02 - Episode: 359 - Steps: 123 - Total Reward: -123.0 - Best Total Reward: -85.0 - Average Total Reward: -175.66016713091923 - Memory Size: 20000
Validate results: -114.0
Time: 02:14:06 - Episode: 360 - Steps: 107 - Total Reward: -107.0 - Best Total Reward: -85.0 - Average Total Reward: -175.46944444444443 - Memory Size: 20000
Validate results: -117.3
Time: 02:14:10 - Episode: 361 - Steps: 104 - Total Reward: -104.0 - Best Total Reward: -85.0 - Average Total Reward: -175.27146814404432 - Memory Size: 20000
Validate results: -133.0
Time: 02:14:14 - Episode: 362 - Steps: 104 - Total Reward: -104.0 - Best Total Reward: -85.0 - Average Total Reward: -175.0745856353591 - Memory Size: 20000
Validate results: -126.9
Time: 02:14:18 - Episode: 363 - Steps: 104 - Total Reward: -104.0 - Best Total Reward: 

Validate results: -100.4
Time: 02:16:54 - Episode: 404 - Steps: 103 - Total Reward: -103.0 - Best Total Reward: -85.0 - Average Total Reward: -169.39356435643563 - Memory Size: 20000
Validate results: -144.6
Time: 02:17:00 - Episode: 405 - Steps: 188 - Total Reward: -188.0 - Best Total Reward: -85.0 - Average Total Reward: -169.43950617283951 - Memory Size: 20000
Time: 02:17:02 - Episode: 406 - Steps: 164 - Total Reward: -164.0 - Best Total Reward: -85.0 - Average Total Reward: -169.42610837438423 - Memory Size: 20000
Time: 02:17:04 - Episode: 407 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -85.0 - Average Total Reward: -169.5012285012285 - Memory Size: 20000
Time: 02:17:06 - Episode: 408 - Steps: 200 - Total Reward: -200.0 - Best Total Reward: -85.0 - Average Total Reward: -169.57598039215685 - Memory Size: 20000
Time: 02:17:07 - Episode: 409 - Steps: 104 - Total Reward: -104.0 - Best Total Reward: -85.0 - Average Total Reward: -169.41564792176038 - Memory Size: 20000
Val

Validate results: -158.0
Time: 02:20:21 - Episode: 451 - Steps: 140 - Total Reward: -140.0 - Best Total Reward: -85.0 - Average Total Reward: -166.0931263858093 - Memory Size: 20000
Validate results: -135.9
Time: 02:20:26 - Episode: 452 - Steps: 137 - Total Reward: -137.0 - Best Total Reward: -85.0 - Average Total Reward: -166.0287610619469 - Memory Size: 20000
Validate results: -136.7
Time: 02:20:31 - Episode: 453 - Steps: 135 - Total Reward: -135.0 - Best Total Reward: -85.0 - Average Total Reward: -165.96026490066225 - Memory Size: 20000
Validate results: -148.6
Time: 02:20:36 - Episode: 454 - Steps: 134 - Total Reward: -134.0 - Best Total Reward: -85.0 - Average Total Reward: -165.88986784140968 - Memory Size: 20000
Validate results: -131.3
Time: 02:20:41 - Episode: 455 - Steps: 126 - Total Reward: -126.0 - Best Total Reward: -85.0 - Average Total Reward: -165.8021978021978 - Memory Size: 20000
Validate results: -163.2
Time: 02:20:46 - Episode: 456 - Steps: 160 - Total Reward: -160

Time: 02:23:30 - Episode: 498 - Steps: 110 - Total Reward: -110.0 - Best Total Reward: -85.0 - Average Total Reward: -163.8152610441767 - Memory Size: 20000
Validate results: -108.9
Time: 02:23:35 - Episode: 499 - Steps: 110 - Total Reward: -110.0 - Best Total Reward: -85.0 - Average Total Reward: -163.7074148296593 - Memory Size: 20000
Validate results: -108.0
Time: 02:23:40 - Episode: 500 - Steps: 109 - Total Reward: -109.0 - Best Total Reward: -85.0 - Average Total Reward: -163.598 - Memory Size: 20000
Validate results: -121.3


'models\\20210412_022343_434645'

In [10]:
agent.load('models\\20210412_022333_410906', load_data=False)
agent.set_playing_data(training=False,
                       memorizing=False)
step, total_reward = env.run_episode(
    agent, max_steps,
    verbose=False, render=True
)
print(total_reward)


num_episodes = 100
agent.set_playing_data(training=False,
                       memorizing=False)
result = env.run_episodes(
    agent, num_episodes, max_steps,
    verbose=True, episode_verbose=False,
    render=False
)
print(f'Solved: {result >= -110}')

-107.0
Time: 02:29:28 - Episode: 1 - Steps: 107 - Total Reward: -107.0 - Best Total Reward: -107.0 - Average Total Reward: -107.0 - Memory Size: 0
Time: 02:29:28 - Episode: 2 - Steps: 110 - Total Reward: -110.0 - Best Total Reward: -107.0 - Average Total Reward: -108.5 - Memory Size: 0
Time: 02:29:28 - Episode: 3 - Steps: 111 - Total Reward: -111.0 - Best Total Reward: -107.0 - Average Total Reward: -109.33333333333333 - Memory Size: 0
Time: 02:29:29 - Episode: 4 - Steps: 107 - Total Reward: -107.0 - Best Total Reward: -107.0 - Average Total Reward: -108.75 - Memory Size: 0
Time: 02:29:29 - Episode: 5 - Steps: 108 - Total Reward: -108.0 - Best Total Reward: -107.0 - Average Total Reward: -108.6 - Memory Size: 0
Time: 02:29:29 - Episode: 6 - Steps: 109 - Total Reward: -109.0 - Best Total Reward: -107.0 - Average Total Reward: -108.66666666666667 - Memory Size: 0
Time: 02:29:30 - Episode: 7 - Steps: 108 - Total Reward: -108.0 - Best Total Reward: -107.0 - Average Total Reward: -108.57142

Time: 02:29:42 - Episode: 56 - Steps: 108 - Total Reward: -108.0 - Best Total Reward: -106.0 - Average Total Reward: -108.66071428571429 - Memory Size: 0
Time: 02:29:43 - Episode: 57 - Steps: 108 - Total Reward: -108.0 - Best Total Reward: -106.0 - Average Total Reward: -108.64912280701755 - Memory Size: 0
Time: 02:29:43 - Episode: 58 - Steps: 110 - Total Reward: -110.0 - Best Total Reward: -106.0 - Average Total Reward: -108.67241379310344 - Memory Size: 0
Time: 02:29:43 - Episode: 59 - Steps: 111 - Total Reward: -111.0 - Best Total Reward: -106.0 - Average Total Reward: -108.71186440677967 - Memory Size: 0
Time: 02:29:43 - Episode: 60 - Steps: 108 - Total Reward: -108.0 - Best Total Reward: -106.0 - Average Total Reward: -108.7 - Memory Size: 0
Time: 02:29:44 - Episode: 61 - Steps: 110 - Total Reward: -110.0 - Best Total Reward: -106.0 - Average Total Reward: -108.72131147540983 - Memory Size: 0
Time: 02:29:44 - Episode: 62 - Steps: 110 - Total Reward: -110.0 - Best Total Reward: -10