In [49]:
!pip install keras-rl2



In [50]:
import gym 
import os 
from tensorflow.keras.models import Sequential , Model
from tensorflow.keras.layers import Dense,Flatten,Input,Concatenate
from tensorflow.keras.optimizers import Adam
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
import numpy as np
from rl.random import OrnsteinUhlenbeckProcess

In [51]:
env = gym.make('Pendulum-v0')

In [52]:
states  = env.observation_space.shape 
actions = env.action_space.shape[0]

In [53]:
states,actions

((3,), 1)

In [54]:
def get_actor_model(states,actions):
  model = Sequential([
                      Flatten(input_shape=((1,)+states)),
                      Dense(128,activation='relu'),
                      Dense(64,activation='relu'),
                      Dense(32,activation='relu'),
                      Dense(actions,activation='linear')
  ])
  return model 

In [55]:
def get_critic(states,actions):
  action_input = Input(shape=(actions,),name='action_input')
  obs_input = Input(shape=(1,)+states,name='obs_input')
  flattened_input = Flatten()(obs_input)
  model = Concatenate()([action_input,flattened_input])
  model=Dense(128,activation='relu')(model)
  model=Dense(64,activation='relu')(model)
  model=Dense(32,activation='relu')(model)
  model=Dense(1,activation='linear')(model)
  model = Model(inputs=[action_input,obs_input],outputs=model)
  return model,action_input
  



In [56]:
def build_agent(critic,actor,states,actions,action_input):
  memory = SequentialMemory(limit=10000,window_length=1)
  policy = BoltzmannQPolicy()
  random_process = OrnsteinUhlenbeckProcess(size=actions,theta=.15,mu=0,sigma=.3)
  agent = DDPGAgent(memory=memory,critic=critic,actor=actor,critic_action_input=action_input,nb_actions=actions,random_process=random_process,nb_steps_warmup_actor=100,nb_steps_warmup_critic=100,gamma=.99,target_model_update=1e-3)
  agent.compile(Adam(learning_rate=1e-2),metrics=['mae'])
  return agent 

In [57]:
actor = get_actor_model(states,actions)
critic,action_input = get_critic(states,actions)
ddpg = build_agent(critic,actor,states,actions,action_input)

In [58]:
ddpg.fit(env,nb_steps=50000,visualize=False)

Training for 50000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,


50 episodes - episode_reward: -1362.525 [-1796.054, -414.676] - loss: 2.999 - mae: 0.869 - mean_q: -33.594

Interval 2 (10000 steps performed)
50 episodes - episode_reward: -769.899 [-1495.627, -125.603] - loss: 12.145 - mae: 2.009 - mean_q: -61.612

Interval 3 (20000 steps performed)
50 episodes - episode_reward: -320.484 [-1492.794, -7.693] - loss: 7.616 - mae: 1.444 - mean_q: -34.928

Interval 4 (30000 steps performed)
50 episodes - episode_reward: -204.080 [-900.712, -2.221] - loss: 6.971 - mae: 1.258 - mean_q: -23.463

Interval 5 (40000 steps performed)
done, took 599.444 seconds


<keras.callbacks.History at 0x7ff54c20cb10>

In [59]:
scores = ddpg.test(env,nb_episodes=100,visualize=False)
print('AVERAGE REWARD :',np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: -222.922, steps: 200
Episode 2: reward: -120.915, steps: 200
Episode 3: reward: -119.192, steps: 200
Episode 4: reward: -1.394, steps: 200
Episode 5: reward: -225.195, steps: 200
Episode 6: reward: -123.057, steps: 200
Episode 7: reward: -0.444, steps: 200
Episode 8: reward: -376.883, steps: 200
Episode 9: reward: -2.027, steps: 200
Episode 10: reward: -118.522, steps: 200
Episode 11: reward: -116.116, steps: 200
Episode 12: reward: -120.466, steps: 200
Episode 13: reward: -670.302, steps: 200
Episode 14: reward: -232.010, steps: 200
Episode 15: reward: -233.511, steps: 200
Episode 16: reward: -509.926, steps: 200
Episode 17: reward: -229.213, steps: 200
Episode 18: reward: -114.990, steps: 200
Episode 19: reward: -122.684, steps: 200
Episode 20: reward: -313.143, steps: 200
Episode 21: reward: -0.649, steps: 200
Episode 22: reward: -243.924, steps: 200
Episode 23: reward: -383.734, steps: 200
Episode 24: reward: -1.388, steps: 200
Episod

In [64]:
os.mkdir('Models')
model_path = os.path.join('Models','DDPG_PENDULUM_KERASRL.h5f')

In [65]:
ddpg.save_weights(model_path)