In [None]:
import cv2
import gym
from plangym.wrappers import FireResetEnv, FrameStack, MaxAndSkipEnv, NoopResetEnv

class ProcessFrame84(gym.ObservationWrapper):
    def __init__(self, env=None):
        super(ProcessFrame84, self).__init__(env)
        self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))

    def observation(self, obs):
        return ProcessFrame84.process(obs)

    @staticmethod
    def process(frame):
        if frame.size == 210 * 160 * 3:
            img = np.reshape(frame, [210, 160, 3]).astype(np.float32)
        elif frame.size == 250 * 160 * 3:
            img = np.reshape(frame, [250, 160, 3]).astype(np.float32)
        else:
            assert False, "Unknown resolution."
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
        resized_screen = cv2.resize(img, (84, 110), interpolation=cv2.INTER_AREA)
        x_t = resized_screen[18:102, :]
        x_t = np.reshape(x_t, [84, 84, 1])
        return x_t.astype(np.uint8)

def wrap(env):
    env = NoopResetEnv(env, noop_max=15, override=True)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = ProcessFrame84(env)
    env = MaxAndSkipEnv(env, skip=3)
    env = FrameStack(env, 4)
    return env

In [None]:
import numpy as np
from collections import deque
import gym
from gym import spaces
import cv2

from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only

import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay



from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()


def show_video():
    
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                 </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")
    

def wrap_env_video(env):
  env = Monitor(env, './video', force=True)
  return env

In [None]:
from fragile.learning.imitation_atari.network import ConvolutionalNeuralNetwork, ModelTrainer

In [None]:
import gym
import argparse
import numpy as np
import atari_py
from IPython.display import clear_output
import time
from plangym import AtariEnvironment

from fragile.core import DiscreteEnv, DiscreteUniform, GaussianDt
from fragile.core.tree import HistoryTree
from fragile.core.swarm import Swarm
from fragile.distributed import ParallelEnv

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

FRAMES_IN_OBSERVATION = 4
FRAME_SIZE = 84
INPUT_SHAPE = (FRAME_SIZE, FRAME_SIZE, FRAMES_IN_OBSERVATION)
MEMORY_SIZE = 90000
EXPLORE_MEMORY_STEPS = 1


class FragileRunner:
    def __init__(self, game_name):

        self.env = AtariEnvironment(
            name=game_name,
            clone_seeds=True,
            wrappers=[wrap],
        )

        self.game_name = game_name
        self.env_callable = lambda: ParallelEnv(lambda: DiscreteEnv(env=self.env), n_workers=8)
        self.model_callable = lambda env:DiscreteUniform(env=self.env)
        self.prune_tree = True
        # A bigger number will increase the quality of the trajectories sampled.
        self.n_walkers = 64
        self.max_epochs = 400  # Increase to sample longer games.
        self.reward_scale = 2  # Rewards are more important than diversity.
        self.distance_scale = 1
        self.minimize = False  # We want to get the maximum score possible.
        self.swarm = swarm = Swarm(
            model=self.model_callable,
            env=self.env_callable,
            tree=lambda: HistoryTree(names=["observs", "actions", "states"], prune=True),
            n_walkers=self.n_walkers,
            max_epochs=self.max_epochs,
            prune_tree=self.prune_tree,
            reward_scale=self.reward_scale,
            distance_scale=self.distance_scale,
            minimize=self.minimize,
            score_limit=600
        )

    def run(self):
        print("Creating fractal replay memory...")
        _ = self.swarm.run()
        print("Max. fractal cum_rewards:", self.swarm.best_reward)
        




In [None]:
class FractalExplorationImitationLearning:

    def __init__(self):
        # We choose a game
        game_name = "SpaceInvaders"

        # Choose after how many runs we should stop
        total_run_limit = 1
        print("Selected game: " + str(game_name))        
        print("Total run limit: " + str(total_run_limit))
        
        env_name = game_name + "Deterministic-v4"
        env = wrap_env_video(wrap(gym.make(env_name)))
        explorer = FragileRunner(env_name)
        
        # Game model
        game_model = ModelTrainer(input_shape=INPUT_SHAPE, n_actions=env.action_space.n)

        # model training
        self._main_loop(env_name, explorer, game_model, total_run_limit)

    def _main_loop(self, env_name, explorer, game_model, total_run_limit):
        run = 0
        while run < total_run_limit:
            run += 1            
            print("Training run:", run)                         

            # We explore the game space state using fragile framework  
            for i in range(EXPLORE_MEMORY_STEPS):
                explorer.run()
                game_model.memorize(explorer.swarm)
                print("model has %s memories" % len(game_model.action_memory))

            # Training a run                       
            metrics = game_model.train(epochs=200, batch_size=32, verbose=0)
            
            
            # Testing model
            clear_output(True)
            print("Model evaluation metrics:\n %s" % metrics)
            score = game_model.evaluate(explorer.swarm)
            
            print("Neural Network score:", score)
            show_video()   

if __name__ == "__main__":
    FractalExplorationImitationLearning()

In [None]:
game_name = "SpaceInvaders"

# Choose after how many runs we should stop
total_run_limit = 1
print("Selected game: " + str(game_name))        
print("Total run limit: " + str(total_run_limit))

env_name = game_name + "Deterministic-v4"
env = wrap_env_video(wrap(gym.make(env_name)))

In [None]:
mt = ModelTrainer(input_shape=INPUT_SHAPE, n_actions=env.action_space.n)

In [None]:
mt.model.metrics_names