# Inverted Pendulum

- Based on OpenAI-Gym library
- Based on nicknochnack github and youtube videos

In [1]:
#conda env create --file=environment.yaml
import gym
from time import sleep
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

### Test random enviroment

In [2]:
environment = gym.make("CartPole-v1", render_mode="human")
#enviroment = gym.make("CartPole-v1", render_mode="rgb_array")
states = environment.observation_space.shape[0]
actions = environment.action_space.n

states, actions

(4, 2)

In [3]:
state = environment.reset()
#enviroment.render()
action = random.choice([0,1])
obs, reward, terminated, truncated , info = environment.step(action)
done = truncated or terminated 
environment.step(action)
#enviroment.close()

(array([-0.04136642,  0.42304686,  0.00880498, -0.6041399 ], dtype=float32),
 1.0,
 False,
 False,
 {})

In [4]:
episodes = 15

for epi in range(1, episodes+1):
    state = environment.reset()
    score = 0
    done = False

    while not done:
        #enviroment.render()
        action = random.choice([0,1])
        obs, reward, terminated, truncated , info = environment.step(action)
        done = truncated or terminated 
        score += reward
        
    print('Episode:{} Score:{}'.format(epi,score))
    #sleep(0.03)

#enviroment.close()



Episode:1 Score:29.0
Episode:2 Score:17.0
Episode:3 Score:15.0
Episode:4 Score:44.0
Episode:5 Score:15.0
Episode:6 Score:19.0
Episode:7 Score:22.0
Episode:8 Score:23.0
Episode:9 Score:50.0
Episode:10 Score:27.0
Episode:11 Score:15.0
Episode:12 Score:19.0
Episode:13 Score:11.0
Episode:14 Score:13.0
Episode:15 Score:37.0


### DL Model

In [3]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape = (1,states)))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

model = build_model(states, actions)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 4)                 0         
_________________________________________________________________
dense (Dense)                (None, 25)                125       
_________________________________________________________________
dense_1 (Dense)              (None, 25)                650       
_________________________________________________________________
dense_2 (Dense)              (None, 25)                650       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 52        
Total params: 1,477
Trainable params: 1,477
Non-trainable params: 0
_________________________________________________________________


### Build Agent with Keras RL

In [5]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit= 70000, window_length = 1)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   nb_actions = actions,
                   nb_steps_warmup =10,
                   target_model_update = 1e-2)
    return dqn

dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics = ['mae'])

In [6]:
dqn.fit(environment, nb_steps=70000, visualize = False, verbose = 1)

Training for 70000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
   10/10000 [..............................] - ETA: 4:11 - reward: 1.0000



108 episodes - episode_reward: 91.231 [10.000, 245.000] - loss: 1.575 - mae: 19.218 - mean_q: 39.043

Interval 2 (10000 steps performed)
55 episodes - episode_reward: 184.418 [138.000, 329.000] - loss: 1.875 - mae: 36.166 - mean_q: 73.057

Interval 3 (20000 steps performed)
51 episodes - episode_reward: 195.059 [129.000, 318.000] - loss: 1.385 - mae: 38.257 - mean_q: 77.098

Interval 4 (30000 steps performed)
60 episodes - episode_reward: 165.917 [117.000, 292.000] - loss: 1.107 - mae: 39.324 - mean_q: 79.259

Interval 5 (40000 steps performed)
51 episodes - episode_reward: 196.706 [124.000, 309.000] - loss: 0.850 - mae: 38.086 - mean_q: 76.654

Interval 6 (50000 steps performed)
37 episodes - episode_reward: 232.162 [126.000, 623.000] - loss: 0.740 - mae: 37.539 - mean_q: 75.649

Interval 7 (60000 steps performed)
done, took 1442.791 seconds


<tensorflow.python.keras.callbacks.History at 0x265152ba370>

In [8]:
score = dqn.test(environment, nb_episodes=100, visualize=False)

Testing for 100 episodes ...


KeyboardInterrupt: 

I have to stop it, it never falls.

### Save model

In [9]:
dqn.save_weights('dqn_weights.h5f', overwrite = True)

In [15]:
dqn.load_weights('dqn_weights.h5f')

In [19]:
env = gym.make("CartPole-v1", render_mode="human")
states = env.observation_space.shape[0]
actions = env.action_space.n

model = build_model(states, actions)
dqn1 = build_agent(model, actions)
dqn1.compile(Adam(lr=1e-3), metrics = ['mae'])
dqn1.load_weights('dqn_weights.h5f')
_ = dqn1.test(env, nb_episodes=10, visualize=False)

Testing for 10 episodes ...


KeyboardInterrupt: 

I have to stop it, it never falls.