# MAPPO Integration

This runs the integrated MAPPO algorithm with the patrolling zoo.

In [75]:
%reload_ext autoreload
%autoreload 2
import onpolicy.runner.shared.patrolling_runner as patrolling_runner
from onpolicy.scripts.train.train_patrolling import *

In [76]:
def parse_args(args, parser):
    parser.add_argument("--graph_name", type=str,
                        default="cumberland", 
                        help="which graph to run on.")
    parser.add_argument("--num_agents", type=int, default=3,
                        help="number of controlled players.")
    parser.add_argument("--representation", type=str, default="simple115v2", 
                        choices=["simple115v2", "extracted", "pixels_gray", 
                                 "pixels"],
                        help="representation used to build the observation.")
    parser.add_argument("--rewards", type=str, default="scoring", 
                        help="comma separated list of rewards to be added.")
    parser.add_argument("--smm_width", type=int, default=96,
                        help="width of super minimap.")
    parser.add_argument("--smm_height", type=int, default=72,
                        help="height of super minimap.")
    parser.add_argument("--remove_redundancy", action="store_true", 
                        default=False, 
                        help="by default False. If True, remove redundancy features")
    parser.add_argument("--zero_feature", action="store_true", 
                        default=False, 
                        help="by default False. If True, replace -1 by 0")
    parser.add_argument("--eval_deterministic", action="store_false", 
                        default=True, 
                        help="by default True. If False, sample action according to probability")
    parser.add_argument("--share_reward", action='store_false', 
                        default=True, 
                        help="by default true. If false, use different reward for each agent.")

    parser.add_argument("--save_videos", action="store_true", default=False, 
                        help="by default, do not save render video. If set, save video.")
    parser.add_argument("--video_dir", type=str, default="", 
                        help="directory to save videos.")
                        
    all_args = parser.parse_known_args(args)[0]

    return all_args

In [77]:
parser = get_config()
all_args = parse_args([], parser)

all_args.env_name = "Patrolling"
all_args.graph_file = f"patrolling_zoo/env/{all_args.graph_name}.graph"

all_args.algorithm_name = "rmappo"
all_args.use_recurrent_policy = True
all_args.use_naive_recurrent_policy = False
all_args.use_centralized_V = True

all_args.n_rollout_threads = 1

all_args.use_wandb = False

# cuda
if all_args.cuda and torch.cuda.is_available():
    print("choose to use gpu...")
    device = torch.device("cuda:0")
    torch.set_num_threads(all_args.n_training_threads)
    if all_args.cuda_deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
else:
    print("choose to use cpu...")
    device = torch.device("cpu")
    torch.set_num_threads(all_args.n_training_threads)

# run dir
run_dir = Path(os.path.join(".", "results", all_args.env_name, all_args.graph_name, all_args.algorithm_name, all_args.experiment_name))
if not run_dir.exists():
    os.makedirs(str(run_dir))

# wandb
if all_args.use_wandb:
    run = wandb.init(config=all_args,
                        project=all_args.env_name,
                        entity=all_args.user_name,
                        notes=socket.gethostname(),
                        name="-".join([
                        all_args.algorithm_name,
                        all_args.experiment_name,
                        "seed" + str(all_args.seed)
                        ]),
                        group=all_args.graph_name,
                        dir=str(run_dir),
                        job_type="training",
                        reinit=True)
else:
    if not run_dir.exists():
        curr_run = 'run1'
    else:
        exst_run_nums = [int(str(folder.name).split('run')[1]) for folder in run_dir.iterdir() if str(folder.name).startswith('run')]
        if len(exst_run_nums) == 0:
            curr_run = 'run1'
        else:
            curr_run = 'run%i' % (max(exst_run_nums) + 1)
    run_dir = run_dir / curr_run
    if not run_dir.exists():
        os.makedirs(str(run_dir))

setproctitle.setproctitle("-".join([
    all_args.env_name, 
    all_args.graph_name, 
    all_args.algorithm_name, 
    all_args.experiment_name
]) + "@" + all_args.user_name)

# seed
torch.manual_seed(all_args.seed)
torch.cuda.manual_seed_all(all_args.seed)
np.random.seed(all_args.seed)

# env init
envs = make_train_env(all_args)
eval_envs = make_eval_env(all_args) if all_args.use_eval else None
num_agents = all_args.num_agents

config = {
    "all_args": all_args,
    "envs": envs,
    "eval_envs": eval_envs,
    "num_agents": num_agents,
    "device": device,
    "run_dir": run_dir
}

# run experiments
if all_args.share_policy:
    from onpolicy.runner.shared.patrolling_runner import PatrollingRunner as Runner
else:
    raise NotImplementedError
    from onpolicy.runner.separated.football_runner import FootballRunner as Runner

runner = Runner(config)
runner.run()

# post process
envs.close()
if all_args.use_eval and eval_envs is not envs:
    eval_envs.close()

if all_args.use_wandb:
    run.finish()
else:
    runner.writter.export_scalars_to_json(str(runner.log_dir + '/summary.json'))
    runner.writter.close()

choose to use gpu...


TypeError: DiagGaussian.forward() takes 2 positional arguments but 3 were given