In [1]:
# # Unity ML-Agents Toolkit
import logging
import argparse

from multiprocessing import Process, Queue
import os
import glob
import shutil
import numpy as np
import yaml

from typing import Any, Callable, Optional, List, NamedTuple


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.exception import TrainerError
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.trainer_util import load_config, TrainerFactory
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.exception import SamplerException
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs.subprocess_env_manager import SubprocessEnvManager

from mlagents.trainers.learn import CommandLineOptions, parse_command_line, create_sampler_manager, try_create_meta_curriculum, prepare_for_docker_run, create_environment_factory

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:

# env_name = "../../../../../../ml-agents-master/envs/first_try_conv/Unity Environment"
env_name = "../../../../../../ml-agents-master/envs/first_try_conv_multi/Unity Environment"
# env_name = None

In [3]:
def create_initial_config():
    return yaml.safe_load(
        """
        trainer: ppo
        batch_size: 64
        beta: 5.0e-3
        buffer_size: 10240
        epsilon: 0.2
        lambd: 0.95
        learning_rate: 3.0e-4
        learning_rate_schedule: linear
        vis_encode_type: custom
        max_steps: 5.0e5
        normalize: false
        num_epoch: 3
        time_horizon: 64
        sequence_length: 4
        summary_freq: 1000
        use_recurrent: true
        memory_size: 256
        curiosity_strength: 0.01
        curiosity_enc_size: 1
        summary_path: test
        model_path: test
        hidden_units: 32
        num_layers: 2
        reward_signals:
          extrinsic:
            strength: 1.0
            gamma: 0.99
        layers_specs:
            - type: conv2D
              filters: 32
              activation: elu
              use_bias: True
              maxPool: False
              kernel_shape: [8, 8]
              strides: [4, 4]
              kernel_initializer: glorot_uniform
              bias_initializer: zeros
            - type: conv2D
              filters: 32
              activation: elu
              use_bias: True
              maxPool: False
              kernel_shape: [4, 4]
              strides: [2, 2]
              kernel_initializer: glorot_uniform
              bias_initializer: zeros
            - type: dense
              nodes: 32
              activation: default
              use_bias: True
              kernel_initializer: default
              bias_initializer: zeros
            - type: dense
              nodes: 32
              activation: default
              use_bias: True
              kernel_initializer: default
              bias_initializer: zeros
              
        """
    )
#         vis_encode_type: custom / simple

In [4]:
initial_config = create_initial_config()
mutation_deviation = 0.1
initial_config["_genetic_parameters"] = {
    "batch_size": {
        "type":"int",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "beta": {
        "type":"continuous",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "buffer_size": {
        "type":"int",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "epsilon": {
        "type":"continuous",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "lambd": {
        "type":"continuous",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "learning_rate": {
        "type":"continuous",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "sequence_length": {
        "type":"int",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "memory_size": {
        "type":"int",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "curiosity_strength": {
        "type":"continuous",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    },
    "curiosity_enc_size": {
        "type":"int",
        "deviation": mutation_deviation,
        "min_deviation": 0.01
    }
}

for i in [0,1]:
    initial_config["layers_specs"][i]["_genetic_parameters"]={
        "filters":{
            "type":"int",
            "deviation": mutation_deviation,
            "min_deviation": 0.01
        }
    }

for i in [2,3]:
    initial_config["layers_specs"][i]["_genetic_parameters"]={
        "nodes":{
            "type":"int",
            "deviation": mutation_deviation,
            "min_deviation": 0.01
        }
    }

In [5]:
def strip_genetic_parameters(config):
    new_config = {};
    for key in config:
        if key != "_genetic_parameters" and key != "layers_specs":
            new_config[key] = config[key]
    if "layers_specs" in config:
        new_config["layers_specs"]=[]
        for layer_specs in config["layers_specs"]:
            new_config["layers_specs"].append(strip_genetic_parameters(layer_specs))
    return new_config

In [6]:
def mutate_config(config):
    new_config = {};
    for key in config:
        if key != "_genetic_parameters" and key != "layers_specs":
            new_config[key] = config[key]
    if "_genetic_parameters" in config:
        for key in config["_genetic_parameters"]:
            mut_type = config["_genetic_parameters"][key]["type"]
            mut_dev = config["_genetic_parameters"][key]["deviation"]
            mut_min_dev = config["_genetic_parameters"][key]["min_deviation"]
            new_config[key] *= 1 + mut_dev * np.random.randn()
            if mut_type == "int":
                new_config[key]= int(round(new_config[key] + np.random.rand() - 0.5))
            # Special rules
            if key == "memory_size":
                new_config[key]= int(round(new_config[key]/4)*4)
    if "layers_specs" in config:
        new_config["layers_specs"]=[]
        for layer_specs in config["layers_specs"]:
            new_config["layers_specs"].append(mutate_config(layer_specs))
    return new_config

In [7]:
defaultCommandLineOptionsDict = {
    "debug":False,
    "num_runs": 1,
    "seed": -1,
    "env_path": env_name,
    "run_id": "ppo_test",
    "load_model": False,
    "train_model": True,
    "save_freq": 50000,
    "keep_checkpoints": 5,
    "base_port": 5005,
    "num_envs": 1,
    "curriculum_folder": None,
    "lesson": 0,
    "slow": False,
    "no_graphics": False,
    "multi_gpu": False,  
    "trainer_config_path": "boeit niet meer",
    "sampler_file_path": None,
    "docker_target_name": None,
    "env_args": None,
    "cpu": False,
}
defaultCommandLineOptions = CommandLineOptions(**defaultCommandLineOptionsDict)

In [8]:
def run_training(sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue, trainer_config):
    curriculum_folder = options.curriculum_folder
    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(
            run_id=options.run_id, sub_id=sub_id
        )
        summaries_dir = "./summaries"
    else:
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=options.docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=options.docker_target_name,
            run_id=options.run_id,
            sub_id=sub_id,
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name
        )
    port = options.base_port + (sub_id * options.num_envs)
    if options.env_path is None:
        port = 5004  # This is the in Editor Training Port
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        port,
        options.env_args,
    )
    env = SubprocessEnvManager(env_factory, options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        curriculum_folder, env, options.lesson
    )
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, env.reset_parameters, run_seed
    )
    trainer_factory = TrainerFactory(
        trainer_config,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )
    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        options.fast_simulation,
        sampler_manager,
        resampling_interval,
    )
    # Signal that environment has been launched.
    process_queue.put(True)
    # Begin training
    tc.start_learning(env)

In [9]:
# for i in range(100):
stripped_configs = strip_genetic_parameters(initial_config)
# mutated_config = mutate_config(initial_config)
# stripped_mutated_configs = strip_genetic_parameters(mutated_config)

In [10]:
env_name

'../../../../../../ml-agents-master/envs/first_try_conv_multi/Unity Environment'

In [11]:
run_seed = np.random.randint(0, 10000)
trainer_config = {
    "default": stripped_configs
}
run_training(0, run_seed, defaultCommandLineOptions, Queue(), trainer_config)













{}




































Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.


Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.


Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Use keras.layers.flatten instead.


Instructions for updating:
Use keras.layers.flatten instead.


Instructions for updating:
Use keras.layers.Dense instead.


Instructions for updating:
Use keras.layers.Dense instead.


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor








Instructions for updating:
Use `tf.random.categorical` instead.


Instructions for updating:
Use `tf.random.categorical` instead.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where












INFO:mlagents.envs:Hyperparameters for the PPOTrainer of brain RollerBallVisualBrain: 
	trainer:	ppo
	batch_size:	64
	beta:	0.005
	buffer_size:	10240
	epsilon:	0.2
	lambd:	0.95
	learning_rate:	0.0003
	learning_rate_schedule:	linear
	vis_encode_type:	custom
	max_steps:	5.0e5
	normalize:	False
	num_epoch:	3
	time_horizon:	64
	sequence_length:	4
	summary_freq:	1000
	use_recurrent:	True
	memory_size:	256
	curiosity_strength:	0.01
	curiosity_enc_size:	1
	summary_path:	./summaries/ppo_test_RollerBallVisualBrain
	model_path:	./models/ppo_test-0/RollerBallVisualBrain
	hidden_units:	32
	num_layers:	2
	reward_signals:	
	  extrinsic:	
	    strength:	1.0
	    gamma:	0.99
	layers_specs:	[{'type': 'conv2D', 'filters': 32, 'activation': 'elu', 'use_bias': True, 'maxPool': False, 'kernel_shape': [8, 8], 'strides': [4, 4], 'kernel_initializer': 'glorot_uniform', 'bias_initializer': 'zeros'}, {'type': 'conv2D', 'filters': 32, 'activation': 'elu', 'use_bias': True, 'maxPool': False, 'kernel_shape': [4, 







{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{

INFO:mlagents.trainers: ppo_test: RollerBallVisualBrain: Step: 1000. Time Elapsed: 46.481 s Mean Reward: -0.770. Std of Reward: 0.484. Training.


{}
{}
{}






{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{

INFO:mlagents.envs:Learning was interrupted. Please wait while the graph is generated.


{}
{}
{}




INFO:mlagents.envs:Saved Model
INFO:mlagents.trainers:List of nodes to export for brain :RollerBallVisualBrain
INFO:mlagents.trainers:	is_continuous_control
INFO:mlagents.trainers:	version_number
INFO:mlagents.trainers:	memory_size
INFO:mlagents.trainers:	action_output_shape
INFO:mlagents.trainers:	recurrent_out
INFO:mlagents.trainers:	action


Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`


Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


INFO:tensorflow:Froze 15 variables.


INFO:tensorflow:Froze 15 variables.


INFO:tensorflow:Converted 15 variables to const ops.


INFO:tensorflow:Converted 15 variables to const ops.


Converting ./models/ppo_test-0/RollerBallVisualBrain/frozen_graph_def.pb to ./models/ppo_test-0/RollerBallVisualBrain.nn






Sorting model, may take a while............... Done!
GLOBALS:

INFO:mlagents.trainers:Exported ./models/ppo_test-0/RollerBallVisualBrain.nn file


 'is_continuous_control', 'version_number', 'memory_size', 'action_output_shape'
IN: 'visual_observation_0': [-1, 84, 84, 3] => 'main_graph_0_encoder0/conv_0/BiasAdd'
IN: 'prev_action': [-1, 1, 1, 1] => 'strided_slice'
IN: 'action_masks': [-1, 1, 1, 4] => 'strided_slice_3'
MEM: 'recurrent_in_c' => 'recurrent_out_c'
MEM: 'recurrent_in_h' => 'recurrent_out_h'
OUT: 'concat/concat', 'concat_1/concat', 'action_probs/action_probs', 'concat_6/concat', 'action'
DONE: wrote ./models/ppo_test-0/RollerBallVisualBrain.nn file.
