In [None]:
import numpy as np
import gym
import gym_kheperaposition

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
import matplotlib.pyplot as plt

ENV_NAME = 'KheperaPositionObstacleD-v0'

# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
env.MaxSteps = 1200
env.Training = True
env.Movements = [[4.285,0.515],[2.4,2.4],[0.515,4.285]]

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=600000, window_length=1)

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=0.1, value_test=.05,
                              nb_steps=600000)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10000,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=0.00015), metrics=['mae'])

# Se cragan los pesos del agente
dqn.load_weights('Weights/dqn.h5')

In [None]:
# Se realiza un Test del agente
env.MaxSteps = 5000
env.Training = False  # Para poder elegir la posicion del robot y el TP
dqn.test(env, nb_episodes=1, visualize=False)