In [1]:
from Utils.Sampler import trajectory_sampler
from Utils.LoadModel import read_config, load_weights
from FeatureImportance.FI import FeatureImportance
import time

#these are the trees tested in this study
from DecisionTrees.Lavender_DT.DecisionTree import WeightedDecisionTrees
from DecisionTrees.Feature_specific_trees.DecisionTree import FeatureSpecifcTrees
from DecisionTrees.viper.viper import VIPER_reSampled, VIPER_weighted
from DecisionTrees.ScikitlearnDT.scikitlearnDT import SKLTree

#if it is one of our methods, some customization is necessary
#leaf_creator: STD for continuous action space, single_class for categorical
#splitting function: 'normal', 'ImportanceWeighing', and 'ImportanceMinimization'
#weighing_method: 'Var_Weighted', 'Max_Avg', 'Max_All', 'Double_Avg', 'Class'
#object_names: if you want to name the axis' for explanation purposes
def create_tree_config(config):
    if not config["env"]["discrete"]:
        config["Tree"] = {"criterion": "MSE", 
            "leaf_creator": "STD", 
            "splitting_function": "ImportanceWeighing",
            "weighing_method": "Var_Weighted",
            "object_names": None}
    else:
        config["Tree"] = {"criterion": "entropy", 
            "leaf_creator": "single_class", 
            "splitting_function": "ImportanceWeighing",
            "weighing_method": "Var_Weighted",
            "object_names": None}

#load environment
#possible domains: bipedal, cartpole, acrobot, lunar_lander, half_cheetah
DOMAIN = "bipedal"
config = read_config(DOMAIN)
model = load_weights(config)
create_tree_config(config)

#sample trajectory
start = time.time()
X,Y = trajectory_sampler(config, model, n = 1)
print("SAMPLING:", time.time() - start)

#get Feature Importance
#possible FI types: Finite Differences (FD), Layerwise Relevency Prop (LRP)
start = time.time()
FI_calculator = FeatureImportance("FD", model.network)
out, FI = FI_calculator.Relevence(X)
print("FI:", time.time() - start)

#fit tree
tree_creator = FeatureSpecifcTrees(config)
start = time.time()
tree_creator.fit(X,Y,FI,out)
print("FITTING:", time.time() - start)


device:  cpu


  model.baseline_network.load_state_dict(torch.load(PATH_critic,map_location=torch.device('cpu')))
  model.policy.load_state_dict((torch.load(PATH_actor,map_location=torch.device('cpu'))))


SAMPLING: 0.7388718128204346
FI: 5.443315267562866
FITTING: 1.154679298400879


after building the tree we run the following metrics

In [4]:
from Utils.Metrics import Uniform, Trajectory, EpisodeDivergence
print(Uniform(config, tree_creator, model, n=100))
print(Trajectory(config, tree_creator, model, trajectory_number=1))
print(EpisodeDivergence(config, [tree_creator], model, n=1))

{'MSE': 3.4993092988779937, 'MAE': 1.3566447029231203, 'MED': 3.1857501286778867}
{'MSE': 3.472539422511418, 'MAE': 1.3541899683167187, 'MED': 3.1819301403046185}
[{'StateEucDifferences': array([0.       , 0.       , 0.       , 2.5272815, 1.4607552, 1.8835047,
       2.754642 , 3.406379 , 3.5853634, 1.5877451, 2.2643123, 1.9277934,
       2.5117493, 3.229769 , 1.9122998, 2.2083094, 2.9502401, 2.7823462,
       2.7857673, 3.0012987, 2.5928826, 2.2160356, 2.5684695, 2.1913846,
       2.5408435, 1.5243863, 3.939281 , 2.005973 , 2.8832793, 1.619947 ,
       1.8493226, 2.7408326, 3.4757266, 2.73834  , 2.0329888, 3.1441176,
       3.2546687, 2.7378914, 1.4842484, 3.679435 , 2.2526321, 2.3417609,
       2.6126168, 3.1340673, 2.5750208, 2.593642 , 3.3275764, 2.9725673,
       2.64768  , 2.662199 , 2.3311555, 3.9889812, 2.2168503, 2.9370162],
      dtype=float32), 'ActionError': array([3.23075189e-03, 3.84232836e-03, 7.01807629e+00, 5.29287392e+00,
       4.48930634e+00, 7.12344158e+00, 4.70557

Below is how we obtained the test-sets

In [1]:
#uniform/global policy
from Utils.Sampler import uniform_sample_policy
from Utils.LoadModel import read_config, load_weights
import os
import numpy as np
def save_samples(path, X, Y, activations = None):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.save(path + "X.npy", X)
    np.save(path + "Y.npy", Y)

    if activations is not None:
        np.save(path + "Activations.npy", activations)
for DOMAIN in ["bipedal", "cartpole", "acrobot", "lunar_lander", "half_cheetah"]:
    config = read_config(DOMAIN)
    model = load_weights(config)


    samples = uniform_sample_policy(config, model, n=10000)
    path = "Test_Sets/Uniform_Samples/" + DOMAIN + "/"

    if len(samples) == 3:
        save_samples(path, samples[0], samples[1], samples[2])
    else:
        save_samples(path, samples[0], samples[1])


device:  cpu
device:  cpu


  model.baseline_network.load_state_dict(torch.load(PATH_critic,map_location=torch.device('cpu')))
  model.policy.load_state_dict((torch.load(PATH_actor,map_location=torch.device('cpu'))))


device:  cpu
device:  cpu
device:  cpu


In [None]:
#trajectory
from Utils.Sampler import trajectory_sampler
from Utils.LoadModel import read_config, load_weights
import os
import numpy as np
def save_samples(path, X, Y, activations = None):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.save(path + "X.npy", X)
    np.save(path + "Y.npy", Y)

    if activations is not None:
        np.save(path + "Activations.npy", activations)
for DOMAIN in ["bipedal", "cartpole", "acrobot", "lunar_lander", "half_cheetah"]:
    config = read_config(DOMAIN)
    model = load_weights(config)


    samples = trajectory_sampler(config, model, n=100, use_seed = True)
    path = "Test_Sets/Trajectory_Samples/" + DOMAIN + "/"

    if len(samples) == 3:
        save_samples(path, samples[0], samples[1], samples[2])
    else:
        save_samples(path, samples[0], samples[1])

device:  cpu


  model.baseline_network.load_state_dict(torch.load(PATH_critic,map_location=torch.device('cpu')))
  model.policy.load_state_dict((torch.load(PATH_actor,map_location=torch.device('cpu'))))
