In [1]:
import pickle
import os
import sys
import itertools as it
import numpy as np
import matplotlib.pyplot as plt
from insect_rl.mdp.utils import grid_math
from insect_rl.mdp.mrl import MyGridWorld, IAVICallback

from mushroom_rl.core import Core
import mushroom_rl.utils.dataset as mrl_dataset
from mushroom_rl.utils.callbacks import CollectDataset

if "../workflow/scripts" not in sys.path:
    sys.path.insert(0, os.path.abspath("../scripts/"))
import exp_funs

#CONDITIONS = ["NOTRAP","TRAP"]
REWARD = 100
TRAP_COST = -100

NUM_AGENTS = 1


with open("../../temp/agent.pickle", 'rb') as agent_i:
    agent = pickle.load(agent_i)
with open("../../Wystrach2020/env.pickle", 'rb') as env_i:
    env_settings = pickle.load(env_i)


sim_settings = {
    'reward':REWARD,
    'trap_cost':TRAP_COST,
    'actions':vars(grid_math)["INTERCARDINALS"],
    'gamma':.99
}
res_dir = f"../../RESULTS/FULLEXPERIMENT/r{sim_settings['reward']}tc{sim_settings['trap_cost']}aINTERg{sim_settings['gamma']}"
try:
    # Create target Directory
    os.mkdir(res_dir)
    print("Directory " , res_dir ,  " Created ") 
except FileExistsError:
    print("Directory " , res_dir ,  " already exists")


Directory  ../../RESULTS/FULLEXPERIMENT/r100tc-100aINTERg0.99  Created 


## creating the environment

In [2]:
def centroid(data):
    x, y = zip(*data)
    l = len(x)
    return int(round(sum(x) / l)), int(round(sum(y) / l))

def environment(configs, sim_settings):
    goals = configs.pop("goals")
    width = configs["width"]
    
    configs["width"] = configs["height"] # TODO WHYYY MUSHROOM_RL???
    configs["height"] = width

    #configs["goal"] = [(g[1], g[0]) for g in goals] # TODO only one possible
    configs["goal"] = centroid(goals)

    return MyGridWorld(**(configs | sim_settings))


mdp = environment(env_settings, sim_settings)

## creating the agents

In [3]:
agents = [agent(mdp.info) for i in range(NUM_AGENTS)]

In [9]:
with open("../../temp/agent.pickle", 'rb') as agent_i:
    agent_cons = pickle.load(agent_i)
agents = [agent] + [agent_cons(mdp.info) for i in range(9)]
agents

[<mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3cf2b422c0>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3bf556a710>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3c100a1ba0>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3ce0fcb070>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3ce0fca6b0>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3ce0fcb400>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3ce0fcbd00>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3ce0fcb910>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3ce0fca050>,
 <mushroom_rl.algorithms.value.td.sarsa.SARSA at 0x7f3ce0fcb730>]

# Experiments

In [4]:
def run_experiment(mdp, condition, agent, i_agent):
    result_dir = f"{res_dir}/{condition}/a{i_agent}"
    os.mkdir(result_dir)
    collect_dataset = CollectDataset()
    iavi_dataconverter = IAVICallback(mdp, agent, statement="fit")

    callbacks = [collect_dataset, iavi_dataconverter]
    core = Core(agent, mdp)
    core.callbacks_fit = callbacks


    len_batch = min(ITERATIONS, 10)

    its = list(range(0, ITERATIONS, len_batch))
    data = []
    Js = []
    episode_lens = []

    tds = np.zeros(iavi_dataconverter.cum_td.shape)
    tds_ns = np.zeros(iavi_dataconverter.cum_td.shape)
    for i in range(len(its)):
        #print(f"batch {i}, {len(data)}")
        core.learn(n_episodes=len_batch, n_steps_per_fit=1, quiet=False)
        training_dataset = collect_dataset.get()
        data.extend(training_dataset)
        collect_dataset.clean()
        Js.append(exp_funs.compute_metrics(training_dataset, mdp.info.gamma))
        episode_lens.extend(mrl_dataset.episodes_length(training_dataset))

        for i in range(iavi_dataconverter.cum_td.shape[0]):
            # TODO maybe not sum but average?
            tds[i] += iavi_dataconverter.cum_td[np.array([i])]
            tds_ns[i] += iavi_dataconverter.cum_td_ns[np.array([i])]

    Js = list(it.chain.from_iterable(Js))
    with open(f"{result_dir}/J_{i_agent}.pickle", 'wb') as o:
        pickle.dump(Js, o)
    
    with open(f"{result_dir}/episode_lens_{i_agent}.pickle", 'wb') as o:
        pickle.dump(episode_lens, o)

    shape = iavi_dataconverter.V.shape
    v = np.zeros(shape)
    for i in range(shape[0]):
        v[i] = iavi_dataconverter.V[np.array([i])]

    np.save(f"{result_dir}/value_fun_{i_agent}.npy", np.rot90(v.reshape(mdp._height, mdp._width)))
    np.save(f"{result_dir}/tds_{i_agent}.npy", np.rot90(tds.reshape(mdp._height, mdp._width)))
    np.save(f"{result_dir}/tds_ns_{i_agent}.npy", np.rot90(tds_ns.reshape(mdp._height, mdp._width)))

    shape = agent.Q.shape
    q = np.zeros(shape)
    for i in range(shape[0]):
        for j in range(shape[1]):
            state = np.array([i])
            action = np.array([j])
            q[i, j] = agent.Q.predict(state, action)
    np.save(f"{result_dir}/q_{i_agent}.npy", q)

    with open(f"{result_dir}/data_{i_agent}.pickle", 'wb') as o:
        pickle.dump(data, o)
    df = exp_funs.convert_trajectories(data, mdp)
    df.to_csv(f"{result_dir}/df_{i_agent}.csv")
    agent.save(f"{result_dir}/agent_{i_agent}", full_save=True)

# Experiment 1 (with the trap closed)

In [10]:
condition = "0-no-trap"
ITERATIONS = 1000

try:
    # Create target Directory
    os.mkdir(dir_:=f"{res_dir}/{condition}")
    print("Directory " , dir_ ,  " Created ") 
except FileExistsError:
    print("Directory " , dir_ ,  " already exists")

for i_agent, agent in enumerate(agents[1:]):
    print(i_agent + 1)
    run_experiment(mdp, condition, agent, i_agent + 1)

Directory  ../../RESULTS/FULLEXPERIMENT/r100tc-100aINTERg0.99/0-no-trap  already exists
1


  result = getattr(asarray(obj), method)(*args, **kwds)
                                               

: 

: 

In [None]:
condition = "1-trap"
ITERATIONS = 1000

try:
    # Create target Directory
    os.mkdir(dir_:=f"{res_dir}/{condition}")
    print("Directory " , dir_ ,  " Created ") 
except FileExistsError:
    print("Directory " , dir_ ,  " already exists")

mdp.open_trap()

for i_agent, agent in enumerate(agents[1:]):
    run_experiment(mdp, condition, agent, i_agent + 1)

Directory  ../../RESULTS/FULLEXPERIMENT/r100tc-100aINTERg0.99/1-trap  Created 


  result = getattr(asarray(obj), method)(*args, **kwds)
                                               