In [1]:
from typing import Sequence

import torch
import numpy as np
from models import MLPModel, FancyMLPModel
from agents import Agent
from collectors import Memory, CrowdCollector
from environments import UnityCrowdEnv, UnitySimpleCrowdEnv
from policy_optimization import CrowdPPOptimizer
from trainers import PPOCrowdTrainer

from tqdm import tqdm, trange
from mlagents_envs.environment import UnityEnvironment

from utils import transpose_batch, concat_batches, concat_crowd_batch, tanh_norm, atanh_unnorm

from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel

import seaborn as sns
import matplotlib.pyplot as plt

from utils import discount_rewards_to_go, get_episode_lens

In [2]:

# env = UnityCrowdEnv(file_name="Test.app", side_channels=[channel], no_graphics=False)
# env = UnityCrowdEnv(file_name=None)
env = UnitySimpleCrowdEnv(file_name="builds/9x9-90deg-1-mac.app")

action_range = (
    torch.tensor([-.3, -1.]),
    torch.tensor([ 1.,  1.])
)

# channel.set_configuration_parameters(time_scale=1, width=1000, height=1000, quality_level=0)#, target_frame_rate=600)

This means that some features may not work unless you upgrade the package with the lower version.Please find the versions that work best together from our release page.
https://github.com/Unity-Technologies/ml-agents/releases


In [3]:
env.engine_channel.set_configuration_parameters(time_scale=100, width=1000, height=1000, quality_level=0)#, target_frame_rate=600)


In [4]:
# agent = Agent.load_agent('/Users/redtachyon/tb_logs/new_metrics_2020-11-24_13-27-36/',
#                          action_range=action_range,
#                          weight_idx=1)


In [5]:
model = MLPModel({
    "input_size": 94,
})

agent = Agent(model, action_range=action_range)

In [6]:
trainer_config = {
    "steps": 500,  # number of steps we want in one PPO step

    # Tensorboard settings
    "tensorboard_name": "jupyter-test",  # str, set explicitly

    # PPO
    "ppo_config": {
        # GD settings
        "optimizer": "adam",
        "optimizer_kwargs": {
            "lr": 1e-4,
            "betas": (0.9, 0.999),
            "eps": 1e-8,
            "weight_decay": 0,
            "amsgrad": False
        },
        "gamma": 0.95,  # Discount factor

        # PPO settings
        "ppo_steps": 10,  # How many max. gradient updates in one iterations
        "eps": 0.1,  # PPO clip parameter
        "target_kl": 0.01,  # KL divergence limit
        "value_loss_coeff": 0.1,
        "entropy_coeff": 0.1,
        "max_grad_norm": 0.5,

        # Backpropagation settings
        "use_gpu": False,
    }
}

trainer = PPOCrowdTrainer(agent, env, trainer_config)


In [7]:
trainer.train(1000)

  0%|          | 0/1000 [00:00<?, ?it/s]


Begin training, logged in /Users/redtachyon/tb_logs/jupyter-test_2020-12-08_15-24-55


TypeError: cannot pickle '_thread.lock' object

In [7]:
import pdb; pdb.pm()

> [0;32m/Users/redtachyon/projects/CrowdAI/training/collectors.py[0m(212)[0;36mcollect_data[0;34m()[0m
[0;32m    210 [0;31m[0;34m[0m[0m
[0m[0;32m    211 [0;31m            [0;31m# Collect the metrics passed by the environment[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 212 [0;31m            [0mmean_distance[0m[0;34m,[0m [0mmean_speed[0m[0;34m,[0m [0mmean_finish[0m [0;34m=[0m [0minfo_dict[0m[0;34m[[0m[0;34m"metrics"[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    213 [0;31m            [0mmetrics[0m[0;34m[[0m[0;34m"mean_distance"[0m[0;34m][0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mmean_distance[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    214 [0;31m            [0mmetrics[0m[0;34m[[0m[0;34m"mean_speed"[0m[0;34m][0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mmean_speed[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
{'has_decision': True, 'metrics': array([16.457783  ,  0.6141771 ,  0.        , 16.46251   ,  0

In [15]:

env.engine_channel.set_configuration_parameters(time_scale=2, width=1000, height=1000, quality_level=0)#, target_frame_rate=600)

import time
time.sleep(5)

env.reset()
data = trainer.collector.collect_data(num_steps=500, disable_tqdm=False)

100%|██████████| 500/500 [00:24<00:00, 20.27it/s]


In [16]:
env.close()


In [6]:
data, metrics = trainer.collector.collect_data(500)


In [13]:

metrics['mean_finish']

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [12]:
data['dones']['Person1?team=0&id=0']

tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, 

In [None]:
# for _ in trange(1000):
#     action_dict = {key: np.array([1., np.random.rand() - 0.5]) for key in obs_dict if key.startswith("Person")}
#     env.step(action_dict)

In [None]:
# for _ in trange(100):
#     action_dict = {key: np.array([0., 0.]) for key in obs_dict if key.startswith("Person")}
#     env.step(action_dict)

In [None]:
# for _ in trange(50):
#     env.unity.set_actions("Person1?team=0", np.repeat([[1, -1]], 9, 0))
#
#     env.unity.step()
#
#
# for _ in trange(50):
#     env.unity.set_actions("Person1?team=0", np.repeat([[1, 1]], 9, 0))
#
#     env.unity.step()
#