In [1]:
import argparse
import os
import sys
import numpy as np
import torch
import gym
import matplotlib

matplotlib.use("agg")
import matplotlib.pyplot as plt
import unittest
from DeepLearning_Models.utils.general import join, plot_combined
from DeepLearning_Models.ActorCritic.policy_gradient import PolicyGradient
from Explanations_Models.LIME import LimeModel
from Explanations_Models.sampling_methods import UniformSampler
import random
import yaml
yaml.add_constructor("!join", join)
parser = argparse.ArgumentParser()
def weight_dict_corrector(loaded_state_dict):
    new_state_dict = {}
    for key, value in loaded_state_dict.items():
        new_key = key.replace("network.", "")  # Remove the 'network.' prefix
        new_state_dict[new_key] = value
    return new_state_dict

In [2]:
config_file = open("config_envs/{}.yml".format("cartpole"))
config = yaml.load(config_file, Loader=yaml.FullLoader)
env = gym.make(config["env"]["env_name"], render_mode="rgb_array")
seed = config["env"]["seed"][0]
model = PolicyGradient(env, config, seed)

In [3]:
model = PolicyGradient(env, config, seed)

model.network.load_state_dict(weight_dict_corrector(torch.load(config["output"]["actor_output"].format(seed))))
model.baseline_network.load_state_dict(torch.load(config["output"]["critic_output"].format(seed)))

<All keys matched successfully>

In [4]:
for param in model.network.parameters():
    print(param)

Parameter containing:
tensor([[ 0.2524,  0.4509,  0.2846,  0.5535],
        [-0.1488,  0.0636, -0.0924,  0.2430],
        [-0.5338,  0.3087,  1.2088,  1.0531],
        [ 0.0801, -0.3858,  1.3917,  0.8502],
        [-0.1463, -0.2219,  1.0615,  1.0172],
        [-0.0216, -0.3818, -1.0799, -1.0177],
        [-0.1755,  0.2484, -1.2146, -0.8090],
        [ 0.1426, -0.0144, -0.7964, -0.7438],
        [ 0.1256, -0.2272,  0.1714,  0.0702],
        [-0.2521, -0.0091, -1.1237, -0.8325],
        [ 0.4108,  0.6206,  0.5909,  0.3908],
        [ 0.0653, -0.3332, -0.1642, -0.2828],
        [ 0.3342,  0.5736,  1.0463,  0.7243],
        [-0.2706, -0.0567, -1.1215, -0.7693],
        [ 0.0642,  0.4346,  1.2469,  1.1546],
        [ 0.1709,  0.5816,  0.6188,  1.0997],
        [ 0.1192, -0.6881, -0.8278, -1.0528],
        [-0.1938, -0.1345,  0.6705,  1.0187],
        [ 0.0609,  0.7742,  0.6144,  0.3797],
        [-0.0837, -0.2195, -0.7932, -0.7146],
        [ 0.3726,  0.0875, -0.2311, -0.7188],
        [-0.

In [6]:
random_tensor = torch.rand(4)
print(random_tensor)
print(model.network.forward(random_tensor))

tensor([0.1023, 0.5899, 0.3439, 0.7094])
tensor([-12.9038,  13.1691], grad_fn=<ViewBackward0>)


Begin LIME Explanations

In [4]:
config_file = open("config_explanations/{}.yml".format("CartPole_Uniform_All"))
config = yaml.load(config_file, Loader=yaml.FullLoader)

In [6]:
arr = np.load("state_list.npy")
print(arr)

[[ 4.89055961e-02  1.53269107e-03  3.86248380e-02  9.09861265e-05]
 [ 4.89362516e-02  1.96080029e-01  3.86266597e-02 -2.80159503e-01]
 [ 5.28578497e-02  4.28972737e-04  3.30234692e-02  2.44516004e-02]
 [ 5.28664291e-02  1.95062160e-01  3.35124992e-02 -2.57631868e-01]
 [ 5.67676723e-02  3.89690042e-01  2.83598639e-02 -5.39558947e-01]
 [ 6.45614713e-02  1.94181129e-01  1.75686851e-02 -2.38076821e-01]
 [ 6.84450939e-02  3.89047742e-01  1.28071485e-02 -5.25166690e-01]
 [ 7.62260482e-02  1.93747938e-01  2.30381428e-03 -2.28475809e-01]
 [ 8.01010132e-02  3.88836890e-01 -2.26570177e-03 -5.20431161e-01]
 [ 8.78777504e-02  1.93746895e-01 -1.26743242e-02 -2.28463024e-01]
 [ 9.17526856e-02  3.89047652e-01 -1.72435846e-02 -5.25116801e-01]
 [ 9.95336398e-02  1.94172561e-01 -2.77459212e-02 -2.37916961e-01]
 [ 1.03417091e-01  3.89679670e-01 -3.25042605e-02 -5.39221048e-01]
 [ 1.11210681e-01  1.95029378e-01 -4.32886817e-02 -2.56954372e-01]
 [ 1.15111269e-01  5.51342382e-04 -4.84277681e-02  2.17665136e

In [7]:
tmparr = []
with torch.no_grad():
    model.network.to("cuda")
    tens = model.network.forward(torch.tensor(arr, device="cuda", dtype=torch.float32))

tens = tens.cpu().numpy()


In [8]:
np.save("output_samples",tens)

In [6]:
samps = LM.sample(config["sampling"])

In [7]:
print(LM.sample_points)

tensor([[-4.6308e-02, -5.4058e-01, -7.4285e-02, -7.6221e-02],
        [ 8.0101e-02,  3.8884e-01, -2.2657e-03, -5.2043e-01],
        [-1.9511e+00, -7.2992e-01,  8.0346e-03,  8.9311e-02],
        [-1.1075e+00, -5.5158e-01, -3.3658e-02,  1.6683e-01],
        [-1.3980e+00, -7.3418e-01, -2.3957e-02,  1.8327e-01],
        [-9.8267e-01, -3.5970e-01,  8.6403e-03, -5.4407e-02],
        [-2.2153e-01, -9.1895e-01, -5.1955e-02,  2.4785e-01],
        [-7.1273e-01, -5.3608e-01,  5.1613e-02, -1.7540e-01],
        [-1.4636e+00, -5.3753e-01, -1.8727e-02, -1.4334e-01],
        [ 1.3137e-01,  2.0330e-01, -8.0144e-02, -4.4042e-01],
        [ 1.1909e-01,  1.9778e-01, -5.3881e-02, -3.1776e-01],
        [ 3.0756e-02, -5.4787e-01, -9.3567e-02,  8.5180e-02],
        [-1.0220e+00, -3.6005e-01, -3.2413e-03, -4.6626e-02],
        [ 4.8936e-02,  1.9608e-01,  3.8627e-02, -2.8016e-01],
        [-2.6217e-01, -9.1754e-01, -3.6524e-02,  2.1659e-01],
        [ 1.1121e-01,  1.9503e-01, -4.3289e-02, -2.5695e-01],
        

In [8]:
Y[:,0]

tensor([ 24.8620,  -2.0449,  16.1394,  -0.7607,  -0.9194,  -1.8511,   3.2358,
        -20.7592, -11.7461,  12.2112,  -0.3942,   7.2018, -10.6108,  -9.5756,
         10.3003,   2.9351,  -8.0528, -13.8004,   1.2682,  -7.1154, -26.7905,
          5.9061,   6.9093,  -2.9693, -19.2307, -16.6755, -20.3268, -14.5343,
        -25.0185,  -2.1161,  14.5308,   3.5906,  10.6796,  10.9008,  -9.1718,
         15.7786,  -0.1640,   5.7855,   6.9645,   6.3669,  -8.8867,   2.4980,
         -7.0730, -18.5327,  -5.6470,   1.9107,   6.2599,  12.3816,  -5.4595,
          0.6563, -11.1298,   5.4386, -11.0380,  15.5845,  12.6373,  11.0331,
          2.4659, -18.2304,   1.3799, -15.2239,  -5.4493, -18.8083, -15.9532,
        -15.1834, -19.2790, -24.7451,  13.5840,  -9.1278,  -6.1397, -21.7683,
        -30.8783, -13.0459,  -6.3169,  -4.2963,  13.3112, -17.6113,   6.6117,
         -9.0245,  -6.6658,  13.0228, -10.4914, -10.6203,  -6.5549,   1.1602,
         14.7766,   9.5811, -22.4211,   1.1953, -15.9179,  -2.17

In [10]:
new_folder_path = config["explanation_output"]["save_path"]

# Create the new folder if it doesn't exist
os.makedirs(new_folder_path, exist_ok=True)
LM.runner()

In [11]:
for i in LM.interpretable_models:
    for param in i.parameters():
        print(param)

Parameter containing:
tensor([[-0.6503, -2.6930, -9.2145, -9.1385]], device='cuda:0',
       requires_grad=True)
Parameter containing:
tensor([[0.6417, 2.6528, 9.6215, 9.5044]], device='cuda:0', requires_grad=True)


In [11]:
t

tensor([0, 0, 0])

In [12]:
t.unsqueeze(0)

tensor([[0, 0, 0]])

In [1]:
import argparse
import os
import sys
import numpy as np
import torch
import gym
import matplotlib

matplotlib.use("agg")
import matplotlib.pyplot as plt
import unittest
from DeepLearning_Models.utils.general import join, plot_combined
from DeepLearning_Models.ActorCritic.policy_gradient import PolicyGradient

import random
import yaml
yaml.add_constructor("!join", join)
parser = argparse.ArgumentParser()

def select_action(state, actor_model):
    # Assuming your actor model directly outputs the action to take
    # You might need to process the output depending on your model architecture
    with torch.no_grad():
        state = torch.from_numpy(state).float()
        action = actor_model(state).max(0)[1].view(1, 1)
    return action.item()

def weight_dict_corrector(loaded_state_dict):
    new_state_dict = {}
    for key, value in loaded_state_dict.items():
        new_key = key.replace("network.", "")  # Remove the 'network.' prefix
        new_state_dict[new_key] = value
    return new_state_dict


parser.add_argument("--config_filename", required=False, type=str)
parser.add_argument("--plot_config_filename", required=False, type=str)
parser.add_argument("--run_basic_tests", required=False, type=bool)

args = parser.parse_args()
config_file = open("config_envs/{}.yml".format(args.config_filename))
config = yaml.load(config_file, Loader=yaml.FullLoader)

env = gym.make(config["env"]["env_name"], render_mode="rgb_array")
seed = config["env"]["seed"][0]
model = PolicyGradient(env, config, seed)

state, info = env.reset()
print(state)

usage: ipykernel_launcher.py [-h] [--config_filename CONFIG_FILENAME]
                             [--plot_config_filename PLOT_CONFIG_FILENAME]
                             [--run_basic_tests RUN_BASIC_TESTS]
ipykernel_launcher.py: error: unrecognized arguments: --f="c:\Users\Bryan Lavender\AppData\Roaming\jupyter\runtime\kernel-v2-245846YkmEx5Pi5ur.json"


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
printa