# Minatar integration

## The minatar wrapper.

Adapt the minatar environment to standard RL environment.

In [None]:
def reset(self):
    """
        Resets the environment.

        Return:
            (observation) the first observation.
    """
    super().reset()
    return self._state().flatten()

In [None]:
def minatar_action(actions):
    """
        Chooses an action among a set of actions.
    """
    actions = actions.flatten()
    action = np.random.choice(np.arange(actions.size), p=actions)
    return action

In [2]:
def step(self, actions):
    """
        Steps in the environment.

        Args:
            actions (): the action to take.

        Return:
            (tensor, float, bool, dict) new observation, reward, done signal and complementary informations.
    """
    reward, done = self.act(minatar_action(actions))
    state = self._state().flatten()

    return state, reward, done, {}

In [3]:
    def render(self, time=0, done=False):
        """
            Resets the environment.

            Args:
                time (int): the number of milliseconds for each frame. if 0, there will be no live animation.
                done (bool): tells if the episode is done.

            Return:
                (Image) the current image of the game.
        """
        if time:
            self.display_state(time=time)
        state = self._state()
        state = state / np.max(state) * 256
        image = Image.fromarray(state)
        return image.convert('P')

In [4]:
def _state(self):
    """
        Reduces the dimensions of the raw observation and normalize it.
    """
    # get the obsservation.
    state = super().state()
    # transpose to make it human readable.
    state = state.transpose((2, 0, 1))
    # sums the object channels to have a single image.
    state = np.sum([state[i] * (i+1) for i in range(state.shape[0])], axis=0)
    # normalize the image
    m, M = np.min(state), np.max(state)
    state = 2 * (state - m) / (M - m) - 1
    return state

## Environment hyper-definition

### Breakout

In [5]:
breakout = Game(env_name="minatar:breakout",
                actionSelect="softmax",
                input_size=100,
                output_size=6,
                time_factor=0,
                layers=[5, 5],
                i_act=np.full(5, 1),
                h_act=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                o_act=np.full(1, 1),
                weightCap=2.0,
                noise_bias=0.0,
                output_noise=[False, False, False],
                max_episode_length=1000,
                in_out_labels=['x', 'x_dot', 'cos(theta)', 'sin(theta)', 'theta_dot',
                               'force']
                )
games["minatar:breakout"] = breakout

## Environment hyper-definition
### Freeway

In [None]:
freeway = Game(env_name="minatar:freeway",
                actionSelect="softmax",
                input_size=100,
                output_size=6,
                time_factor=0,
                layers=[5, 5],
                i_act=np.full(5, 1),
                h_act=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                o_act=np.full(1, 1),
                weightCap=2.0,
                noise_bias=0.0,
                output_noise=[False, False, False],
                max_episode_length=1000,
                in_out_labels=['x', 'x_dot', 'cos(theta)', 'sin(theta)', 'theta_dot',
                               'force']
                )
games["minatar:freeway"] = freeway

## Hyperparameters automatic search.

In [None]:
class RunBuilder:
    @staticmethod
    def get_runs(parameters):
        runs = []
        for v in product(*parameters.values()):
            runs.append(dict(zip(parameters.keys(), v)))

        return runs

In [None]:
parameters = OrderedDict(
    popSize=[64, 200],

    prob_addConn=[.025, .1],
    prob_addNode=[.015, .06],
    prob_crossover=[.7, .9],
    prob_enable=[.005, .02],
    prob_mutConn=[.7, .9],
    prob_initEnable=[.8, 1.],
)

In [None]:
runs = RunBuilder.get_runs(parameters)
t = range(60, 100)
b_fit = 0
b_run = -1
for run in t:
    fitness = run_one_hyp(hyp, runs[run], run)
    if fitness > b_fit:
        b_fit = fitness
        b_run = run
    comment = f"run - {run} - fitness: {fitness} |---| b_fit - {b_fit} ({b_run}) |---| params - {list(runs[run].values())}\t\t\t\t{run} / {len(runs)}"
    print(comment)

- the runs where ran on **Breakout** because it is a lot faster to evaluate.

- **fitnesses** where **recorded** for further investigations.

however...

results were *not good* at all!

the best set of hyperparameters was:

| popSize | prob_addConn | prob_addNode | prob_crossover | prob_enable | prob_mutAct | prob_mutConn | prob_initEnable | budget |
| ------- | ------------ | ---- | ---- | ---- | ---- | ---- | ---- | ----- |
|    32   |    .025      | .015 |  .7  |  .02 |  .0  |  .9  |  1.  | 50000 |

and the fitness seen during search was 6.0

So, for final training, we have used the above set of parameters.

## The experiment.

- run 50000 learning processes 3 times to show statistical results.

- use the parameters of the above search result.

## The results.

- time spent on Breakout:
- final fitness on Breakout:
- time spent on Freeway:
- final fitness on Freeway:

video Breakout

video Freeway