In [None]:
!pip install keras-rl2

In [None]:
!pip install keras-rl2
!pip3 install Box2D
!pip3 install box2d-py
!pip3 install gym[Box_2D]
!apt-get install swig

In [3]:
import gym 
import os 
from tensorflow.keras.layers import Dense,Flatten,Input,Concatenate
from tensorflow.keras.models import Model,Sequential 
from tensorflow.keras.optimizers import Adam 
from rl.agents import DDPGAgent , DQNAgent 
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.random import   OrnsteinUhlenbeckProcess


In [4]:
env = gym.make('BipedalWalker-v3')



In [5]:
states = env.observation_space.shape
actions = env.action_space.shape[0]

In [6]:
states,actions

((24,), 4)

In [7]:
def get_actor(states,actions):
  model = Sequential([
                      Flatten(input_shape=(1,)+states),
                      Dense(128,activation='relu'),
                      Dense(64,activation='relu'),
                      Dense(32,activation='relu'),
                      Dense(16,activation='relu'),
                      Dense(actions,activation='linear')
  ])

  return model

In [8]:
def get_critic(states,actions):
  action_input = Input(shape=(actions,),name='action_input')
  obs_input = Input(shape=(1,)+states,name='obs_input')
  flattened_input = Flatten()(obs_input)
  model = Concatenate()([action_input,flattened_input])
  model = Dense(128,activation='relu')(model)
  model = Dense(64,activation='relu')(model)
  model = Dense(32,activation='relu')(model)
  model = Dense(16,activation='relu')(model)
  model = Dense(1,activation='linear')(model)
  x = Model(inputs=[action_input,obs_input],outputs=model)
  return x , action_input 

In [9]:
def build_agent(crtic,actor,states,actions,action_input):
  memory = SequentialMemory(limit=50000,window_length=1)
  random_process = OrnsteinUhlenbeckProcess(theta=.15,sigma=.3,size=actions,mu=0)
  ddpg = DDPGAgent(random_process=random_process,nb_actions=actions,actor=actor,critic=critic,critic_action_input=action_input,memory=memory,nb_steps_warmup_actor=100,nb_steps_warmup_critic=100,target_model_update=1e-3)
  ddpg.compile(Adam(learning_rate=1e-2),metrics=['mae'])
  return ddpg

In [10]:
actor = get_actor(states,actions)
critic,action_input = get_critic(states,actions)
ddpg = build_agent(critic,actor,states,actions,action_input)

In [15]:
ddpg.fit(env,nb_steps=40000,visualize=False)

Training for 40000 steps ...
Interval 1 (0 steps performed)
7 episodes - episode_reward: -162.551 [-178.794, -110.634] - loss: 0.792 - mae: 0.050 - mean_q: -8.296

Interval 2 (10000 steps performed)
8 episodes - episode_reward: -153.722 [-177.962, -104.117] - loss: 0.570 - mae: 0.042 - mean_q: -8.568

Interval 3 (20000 steps performed)
6 episodes - episode_reward: -166.529 [-176.965, -150.069] - loss: 0.519 - mae: 0.040 - mean_q: -8.807

Interval 4 (30000 steps performed)
done, took 455.949 seconds


<keras.callbacks.History at 0x7f34bc202810>

In [16]:
!rm -rf Models

In [None]:
scores = ddpg.test(env,nb_episodes=100,visualize=False)
import numpy as np
print(np.mean(scores.history['episode_reward']))

In [19]:
if not os.path.exists('Models'):
  os.mkdir('Models')

In [20]:
ddpg.save_weights('Models/DDPG_KERASRL_BIPEDALWALKERV3.hf5')