In [None]:
import random
import gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory



env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
actions = env.action_space.n


episodes = 10

# for episode in range(1, episodes + 1):
#     state = env.reset()
#     done = False
#     score = 0
    
#     while not done:
#         env.render()
#         action = random.choice([0,1])
#         n_state, reward, info, done = env.step(action)
#         score += reward
#     print('Episodes:{}  Score:{}'.format(episode, score))
    
    
def build_model(states, actions):                               # function for building model
    model = Sequential()
    model.add(Flatten(input_shape = (1, states)))
    model.add(Dense(24, activation = 'relu'))
    model.add(Dense(24, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))
    
    return model

model = build_model(states, actions)


def build_agent(model, actions):                        # function for building agent
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit = 500, window_length = 1)
    dqn = DQNAgent(model = model, memory = memory, policy = policy, nb_actions = actions
                    , nb_steps_warmup = 10, target_model_update = 1e-2)
    
    return dqn
    
    
dqn = build_agent(model, actions)                               # making instance of agent
dqn.compile(Adam(lr = 1e-3), metrics = ['mae'])                     # setting metrics for training
dqn.fit(env, nb_steps = 500, visualize = False, verbose = 1)      # training

scores = dqn.test(env, nb_episodes = 10, visualize = False)        # testing on trained model
print(np.mean(scores.history['episode_reward']))                    # visualizing scores

_ = dqn.test(env, nb_episodes = 5, visualize = True)               # rendering the environment


dqn.save_weights('dqn_weights.hf5', overwrite = True)           # saving model weights
    
    
    

In [None]:
# deleting model and loading weights from memory

del model 
del dqn
del env


In [None]:

# model deleted, now being reloaded from memory


env = gym.make('CartPole-v0')                           # making environment
states = env.observation_space.shape[0]                 # number of states
actions = env.action_space.n                            # number of actions
model = build_model(states, actions)                    # building model
dqn = build_agent(model, actions)                       # building agent
dqn.compile(Adam(lr = 1e-3), metrics = ['mae'])         # setting metrics

In [None]:
dqn.load_weights('dqn_weights.h5f')                             # loading weights from pre trained model

In [None]:
_ = dqn.test(env, nb_episodes = 5, visualize = True)    # testing