# Q-Learning y Deep Q-Learning.

- ### Pablo Melendez
- ### Hector Magana

In [None]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory


def make_model(name, isSimple = True, mem_limit = 1000, batch = 50, warmup = 2000, train = 50, steps = 100000):
    env = gym.make(name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]
    model = Sequential()

    if isSimple:
        model.add(Flatten(input_shape=(obs_dim,))
        model.add(Dense(nb_actions))
        model.add(Activation('softmax'))
    else:
        model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('softmax'))

    print(model.summary())


    memory = EpisodeParameterMemory(limit=mem_limit, window_length=1)

    cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                   batch_size=batch, nb_steps_warmup=warmup, train_interval=train, elite_frac=0.05)
    cem.compile()

    cem.fit(env, nb_steps=steps, visualize=False, verbose=2)

    cem.save_weights('cem_{}_params.h5f'.format(name), overwrite=True)

    cem.test(env, nb_episodes=5, visualize=True)