<a href="https://colab.research.google.com/github/Chris-Carvelli/ContinualLearning-ITU19/blob/master/Minigrid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Minigird HyperNN test

In [0]:
import os
import torch
import pickle
import matplotlib.pyplot as plt


def random_z_v(z_dim, z_num):
    # ret = np.random.normal(0.01, 1.0, z_dim * z_num)
    return torch.distributions.normal.Normal(torch.zeros([z_dim * z_num]), 1.0).sample()


def plot(env, experiment):
    path = os.path.join(
        os.getcwd(),
        f'Experiments/{env}/{experiment}/process.pickle'
    )

    fp = open(path, 'rb')
    data = []

    try:
        data.append(pickle.load(fp))
    except EOFError:
        fp.close()

    gen = list(range(len(data)))
    s_med = [d[0] for d in data]
    s_avg = [d[1] for d in data]
    s_max = [d[2] for d in data]

    plt.plot(gen, s_med)
    plt.plot(gen, s_avg)
    plt.plot(gen, s_max)

    plt.legend()

    plt.show()



In [0]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from functools import reduce

import gym

import time


# TODO create proper setting file (as .cfg)
Z_DIM = 32
Z_VECT_EVOLUTION_PROBABILITY = 0.5
# TODO compute in HyperNN.__init__()
Z_NUM = 4


class HyperNN(nn.Module):
    def __init__(self, named_parameters=None):
        super().__init__()

        # TODO examine shapes of all layers and get max
        max_size = 32 * 64 * 2 * 2

        # TODO get n layers from len(shapes)
        self.z_v = random_z_v(Z_DIM, Z_NUM)

        self.l1 = nn.Linear(Z_DIM, 128)
        self.l2 = nn.Linear(128, 128)
        self.out = nn.Linear(128, max_size)

        self.add_tensors = {}

        self.init()

    def forward(self, layer_index):
        x = chunks(self.z_v, Z_DIM)[layer_index]

        # x = torch.from_numpy(x).float()
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))

        return self.out(x)

    def evolve(self, sigma):
        p = torch.distributions.normal.Normal(0.5, 0.1).sample().item()
        if p > Z_VECT_EVOLUTION_PROBABILITY:
            # evolve z vector
            self.z_v += torch.distributions.normal.Normal(torch.zeros([Z_DIM * Z_NUM]), sigma).sample()
        else:
            # evolve weights
            params = self.named_parameters()
            for name, tensor in sorted(params):
                to_add = self.add_tensors[tensor.size()]
                to_add.normal_(0.0, sigma)
                tensor.data.add_(to_add)

    def init(self):
        for name, tensor in self.named_parameters():
            if tensor.size() not in self.add_tensors:
                self.add_tensors[tensor.size()] = torch.Tensor(tensor.size())
            if 'weight' in name:
                nn.init.kaiming_normal(tensor)
            else:
                tensor.data.zero_()


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.hyperNN = HyperNN()

        # Define image embedding
        self.image_conv = nn.Sequential(
            nn.Conv2d(3, 16, (2, 2)),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(16, 32, (2, 2)),
            nn.ReLU(),
            nn.Conv2d(32, 64, (2, 2)),
            nn.ReLU()
        )

        self.out = nn.Linear(64, 4)

        self.add_tensors = {}

        self.update_weights()

    def forward(self, x):
        # x = x.reshape([1, 147])
        x = torch.transpose(torch.transpose(x, 1, 3), 2, 3)
        x = self.image_conv(x)
        x = x.reshape(x.shape[0], -1)

        return self.out(x)

    def evolve(self, sigma):
        self.hyperNN.evolve(sigma)
        self.update_weights()

    def init(self):
        for name, tensor in self.named_parameters():
            if tensor.size() not in self.add_tensors:
                self.add_tensors[tensor.size()] = torch.Tensor(tensor.size())
            if 'weight' in name:
                tensor.data.normal_(0, 1)
                tensor.data *= 1 / torch.sqrt(tensor.pow(2).sum(1, keepdim=True))
            else:
                tensor.data.zero_()

    def update_weights(self):
        # TODO find better impl
        z_chunk = 0
        for i, layer in enumerate(self.image_conv):
            for name, param in layer.named_parameters():
                if 'weight' in name:
                    self.image_conv[i].weight = self.get_weights(z_chunk, layer.weight.shape)
                    z_chunk += 1

    def get_weights(self, layer_index, layer_shape):
        w = self.hyperNN(layer_index)
        w = torch.narrow(w, 0, 0, reduce((lambda x, y: x * y), layer_shape))
        w = w.view(layer_shape)

        return torch.nn.Parameter(w)


def evaluate_model(env_key, model, max_eval, render=False, fps=60):
    env = gym.make(env_key)
    # env = FlatObsWrapper(env)
    state = env.reset()

    model.eval()

    tot_reward = 0
    reward = 0
    n_eval = 0
    # TMP
    action_freq = np.zeros([7])
    while reward == 0 and n_eval < max_eval:
        state = state['image']

        # removed some scaffolding, check if something was needed
        values = model(Variable(torch.Tensor([state])))
        # values = env.step(env.action_space.sample())
        action = np.argmax(values.data.numpy()[:env.action_space.n])

        # TMP remapping toggle action
        if action is 3:
            action = 5

        action_freq[action] += 1
        state, reward, is_done, _ = env.step(action)
        if render:
            print('hello')
            env.render('human')
            print('action=%s, reward=%.2f' % (action, reward))
            time.sleep(1/fps)

        tot_reward += reward
        n_eval += 1

    env.close()
    if tot_reward > 0:
        print(f'action_freq: {action_freq/n_eval}\treward: {tot_reward}')
    return tot_reward


def chunks(l, n):
    ret = []
    for i in range(0, len(l), n):
        ret.append(l[i:i + n])

    return ret


In [0]:
import copy
import random
import pickle
import os



class GA:
    def __init__(self, population, env_key, max_eval=100):
        self.population = population
        self.env_key = env_key
        self.max_eval = max_eval

        self.models = [Model() for _ in range(population)]

    def get_best_models(self, models=None, trials=1):
        if models is None:
            models = self.models

        scored_models = list(zip(
            models,
            map(
                evaluate_model,
                [self.env_key] * (self.population * trials),
                [y for x in models for y in trials * [x]],
                [self.max_eval] * (self.population * trials))
            )
        )

        scored_models = [(scored_models[i][0], sum(s for _, s in scored_models[i * trials:(i + 1)*trials]) / trials)
                         for i in range(0, len(scored_models), trials)]
        scored_models.sort(key=lambda x: x[1], reverse=True)

        return scored_models

    def evolve_iter(self, sigma, truncation, trials, elite_trials, n_elites):
        scored_models = self.get_best_models(trials=trials)
        models = [m for m, _ in scored_models]
        scores = [s for _, s in scored_models]
        median_score = np.median(scores)
        mean_score = np.mean(scores)
        max_score = scored_models[0][1]

        scored_parents = self.get_best_models(models[:truncation], elite_trials)
        parents = [p for p, _ in filter(lambda x: x[1] > 0, scored_parents)]
        # Elitism
        self.models = parents[:n_elites]

        for individual in range(self.population - n_elites):
            self.models.append(copy.deepcopy(random.choice(scored_models)[0]))
            self.models[-1].evolve(sigma)

        return median_score, mean_score, max_score, self.models[0]

    def optimize(self, n_generation, sigma, truncation, trials=1, elite_trials=1, n_elites=1):
        print('start')
        path = os.path.join(
            os.getcwd(),
            'Experiments',
            self.env_key,
            f'{self.population}_{n_generation}_{sigma}_{truncation}')
        os.makedirs(path)
        fp = open(f'{path}\\process.pickle', 'ab')

        for g in range(n_generation):
            s_med, s_avg, s_max, elite = self.evolve_iter(sigma, truncation, trials, elite_trials, n_elites)

            print(f'Done with generation {g} [Median: {s_med}, average: {s_avg}, max: {s_max}]')
            pickle.dump((s_med, s_avg, s_max, elite), fp)

        print('done')


In [0]:
import gym_minigrid

def main():
    print('main')

    ga = GA(500, 'MiniGrid-Empty-Noise-8x8-v0')

    ga.optimize(50, 0.005, 20, elite_trials=10, n_elites=1)


if __name__ == "__main__":
    main()


main




start
action_freq: [0.23076923 0.         0.76923077 0.         0.         0.
 0.        ]	reward: 0.954296875
action_freq: [0.         0.28571429 0.71428571 0.         0.         0.
 0.        ]	reward: 0.95078125
action_freq: [0.         0.16666667 0.83333333 0.         0.         0.
 0.        ]	reward: 0.97890625
action_freq: [0.         0.09090909 0.90909091 0.         0.         0.
 0.        ]	reward: 0.961328125
action_freq: [0.        0.2173913 0.7826087 0.        0.        0.        0.       ]	reward: 0.919140625
action_freq: [0.         0.11111111 0.88888889 0.         0.         0.
 0.        ]	reward: 0.968359375
action_freq: [0.         0.11111111 0.88888889 0.         0.         0.
 0.        ]	reward: 0.968359375
action_freq: [0.   0.25 0.75 0.   0.   0.   0.  ]	reward: 0.9296875
action_freq: [0.        0.2173913 0.7826087 0.        0.        0.        0.       ]	reward: 0.919140625
action_freq: [0.  0.2 0.8 0.  0.  0.  0. ]	reward: 0.982421875
action_freq: [0.    0.125