In [1]:
import glob
import os
import time
import gzip
import platform
from datetime import datetime

if 'mac' in platform.platform():
    pass
else:
    os.environ['MUJOCO_GL'] = 'egl'
    if 'SLURM_STEP_GPUS' in os.environ:
        os.environ['EGL_DEVICE_ID'] = os.environ['SLURM_STEP_GPUS']

from absl import app, flags
from functools import partial
import numpy as np
import jax
import jax.numpy as jnp
import flax
from jaxrl_m.dataset import Dataset

import tqdm

from src import d4rl_utils, d4rl_ant, ant_diagnostics, viz_utils
from src.agents import TempDATA as learner
from src.dataset_utils import GCDataset

from jaxrl_m.wandb import setup_wandb, default_wandb_config
import wandb
from jaxrl_m.evaluation import evaluate_with_trajectories, EpisodeMonitor, supply_rng

from ml_collections import config_flags
import pickle

from src.utils import record_video, CsvLogger

  from distutils.dep_util import newer, newer_group
No module named 'mjrl'
No module named 'flow'
No module named 'carla'
pybullet build time: May 20 2022 19:44:17
  import imp


In [2]:
import matplotlib

matplotlib.use('Agg')
from matplotlib import patches

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from functools import partial
from mpl_toolkits.axes_grid1 import make_axes_locatable

import gym
import d4rl
import numpy as np
import functools as ft
import math
from jaxrl_m.dataset import Dataset
import matplotlib.gridspec as gridspec


def get_canvas_image(canvas):
    canvas.draw()
    out_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8')
    out_image = out_image.reshape(canvas.get_width_height()[::-1] + (3,))
    return out_image


def valid_goal_sampler(self, np_random):
    valid_cells = []
    goal_cells = []

    for i in range(len(self._maze_map)):
        for j in range(len(self._maze_map[0])):
            if self._maze_map[i][j] in [0, 'r', 'g']:
                valid_cells.append((i, j))

    sample_choices = valid_cells
    cell = sample_choices[np_random.choice(len(sample_choices))]
    xy = self._rowcol_to_xy(cell, add_random_noise=True)

    random_x = np.random.uniform(low=0, high=0.5) * 0.25 * self._maze_size_scaling
    random_y = np.random.uniform(low=0, high=0.5) * 0.25 * self._maze_size_scaling

    xy = (max(xy[0] + random_x, 0), max(xy[1] + random_y, 0))

    return xy


class GoalReachingAnt(gym.Wrapper):
    def __init__(self, env_name):
        self.env = gym.make(env_name)
        self.env.env.env._wrapped_env.goal_sampler = ft.partial(valid_goal_sampler, self.env.env.env._wrapped_env)
        self.observation_space = gym.spaces.Dict({
            'observation': self.env.observation_space,
            'goal': self.env.observation_space,
        })
        self.action_space = self.env.action_space

    def step(self, action):
        next_obs, r, done, info = self.env.step(action)

        achieved = self.get_xy()
        desired = self.target_goal
        distance = np.linalg.norm(achieved - desired)
        info['x'], info['y'] = achieved
        info['achieved_goal'] = np.array(achieved)
        info['desired_goal'] = np.copy(desired)
        info['success'] = float(distance < 0.5)
        done = 'TimeLimit.truncated' in info

        return self.get_obs(next_obs), r, done, info

    def get_obs(self, obs):
        target_goal = obs.copy()
        target_goal[:2] = self.target_goal
        return dict(observation=obs, goal=target_goal)

    def reset(self):
        obs = self.env.reset()
        return self.get_obs(obs)

    def get_starting_boundary(self):
        self = self.env.env.env
        torso_x, torso_y = self._init_torso_x, self._init_torso_y
        S = self._maze_size_scaling
        return (0 - S / 2 + S - torso_x, 0 - S / 2 + S - torso_y), (
        len(self._maze_map[0]) * S - torso_x - S / 2 - S, len(self._maze_map) * S - torso_y - S / 2 - S)

    def XY(self, n=20):
        bl, tr = self.get_starting_boundary()
        X = np.linspace(bl[0] + 0.04 * (tr[0] - bl[0]), tr[0] - 0.04 * (tr[0] - bl[0]), n)
        Y = np.linspace(bl[1] + 0.04 * (tr[1] - bl[1]), tr[1] - 0.04 * (tr[1] - bl[1]), n)

        X, Y = np.meshgrid(X, Y)
        states = np.array([X.flatten(), Y.flatten()]).T
        return states

    def four_goals(self):
        self = self.env.env.env

        valid_cells = []
        goal_cells = []

        for i in range(len(self._maze_map)):
            for j in range(len(self._maze_map[0])):
                if self._maze_map[i][j] in [0, 'r', 'g']:
                    valid_cells.append(self._rowcol_to_xy((i, j), add_random_noise=False))

        goals = []
        goals.append(max(valid_cells, key=lambda x: -x[0] - x[1]))
        goals.append(max(valid_cells, key=lambda x: x[0] - x[1]))
        goals.append(max(valid_cells, key=lambda x: x[0] + x[1]))
        goals.append(max(valid_cells, key=lambda x: -x[0] + x[1]))
        return goals

    def draw(self, ax=None):
        if not ax: ax = plt.gca()
        self = self.env.env.env
        torso_x, torso_y = self._init_torso_x, self._init_torso_y
        S = self._maze_size_scaling
        for i in range(len(self._maze_map)):
            for j in range(len(self._maze_map[0])):
                struct = self._maze_map[i][j]
                if struct == 1:
                    rect = patches.Rectangle((j * S - torso_x - S / 2, i * S - torso_y - S / 2), S, S,
                                             linewidth=1, edgecolor='none', facecolor='grey', alpha=1.0)

                    ax.add_patch(rect)
        ax.set_xlim(0 - S / 2 + 0.6 * S - torso_x, len(self._maze_map[0]) * S - torso_x - S / 2 - S * 0.6)
        ax.set_ylim(0 - S / 2 + 0.6 * S - torso_y, len(self._maze_map) * S - torso_y - S / 2 - S * 0.6)
        ax.axis('off')


def get_env_and_dataset(env_name):
    env = GoalReachingAnt(env_name)
    dataset = d4rl.qlearning_dataset(env)
    dataset['masks'] = 1.0 - dataset['terminals']
    dataset['dones_float'] = 1.0 - np.isclose(np.roll(dataset['observations'], -1, axis=0),
                                              dataset['next_observations']).all(-1)
    dataset = Dataset.create(**dataset)
    return env, dataset


def plot_value(env, dataset, value_fn, fig, ax, N=20, random=False, title=None):
    observations = env.XY(n=N)

    if random:
        base_observations = np.copy(dataset['observations'][np.random.choice(dataset.size, len(observations))])
    else:
        base_observation = np.copy(dataset['observations'][0])
        base_observations = np.tile(base_observation, (observations.shape[0], 1))

    base_observations[:, :2] = observations

    values = value_fn(base_observations)

    x, y = observations[:, 0], observations[:, 1]
    x = x.reshape(N, N)
    y = y.reshape(N, N)
    values = values.reshape(N, N)
    mesh = ax.pcolormesh(x, y, values, cmap='viridis')
    env.draw(ax)

    divider = make_axes_locatable(ax)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    fig.colorbar(mesh, cax=cax, orientation='vertical')

    if title:
        ax.set_title(title)


def plot_policy(env, dataset, policy_fn, fig, ax, N=20, random=False, title=None):
    observations = env.XY(n=N)

    if random:
        base_observations = np.copy(dataset['observations'][np.random.choice(dataset.size, len(observations))])
    else:
        base_observation = np.copy(dataset['observations'][0])
        base_observations = np.tile(base_observation, (observations.shape[0], 1))

    base_observations[:, :2] = observations

    policies = policy_fn(base_observations)

    x, y = observations[:, 0], observations[:, 1]
    x = x.reshape(N, N)
    y = y.reshape(N, N)

    policy_x = policies[:, 0].reshape(N, N)
    policy_y = policies[:, 1].reshape(N, N)
    mesh = ax.quiver(x, y, policy_x, policy_y)
    env.draw(ax)
    if title:
        ax.set_title(title)


def plot_trajectories(env, dataset, trajectories, fig, ax, color_list=None):
    if color_list is None:
        from itertools import cycle
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
        color_list = cycle(color_cycle)

    for color, trajectory in zip(color_list, trajectories):
        obs = np.array(trajectory['observation'])
        all_x = obs[:, 0]
        all_y = obs[:, 1]
        ax.scatter(all_x, all_y, s=5, c=color, alpha=0.02)
        ax.scatter(all_x[-1], all_y[-1], s=50, c=color, marker='*', alpha=0.3)

    env.draw(ax)


def plot_line_trajectories(env, dataset, trajectories, fig, ax, color_list=None):
    if color_list is None:
        from itertools import cycle
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
        color_list = cycle(color_cycle)

    for color, trajectory in zip(color_list, trajectories):
        obs = np.array(trajectory['observation'])
        all_x = obs[:, 0]
        all_y = obs[:, 1]
        ax.plot(all_x, all_y, color=color, linewidth=0.7)

    env.draw(ax)


def gc_sampling_adaptor(policy_fn):
    def f(observations, *args, **kwargs):
        return policy_fn(observations['observation'], observations['goal'], *args, **kwargs)

    return f


def trajectory_image(env, dataset, trajectories, **kwargs):
    fig = plt.figure(tight_layout=True)
    canvas = FigureCanvas(fig)

    plot_line_trajectories(env, dataset, trajectories, fig, plt.gca(), **kwargs)

    plt.tight_layout()
    image = get_canvas_image(canvas)
    plt.close(fig)
    return image


def value_image(env, dataset, value_fn):
    fig = plt.figure(tight_layout=True)
    canvas = FigureCanvas(fig)
    plot_value(env, dataset, value_fn, fig, plt.gca())
    image = get_canvas_image(canvas)
    plt.close(fig)
    return image


def most_squarelike(n):
    c = int(n ** 0.5)
    while c > 0:
        if n % c in [0, c - 1]:
            return (c, int(math.ceil(n / c)))
        c -= 1


def make_visual(env, dataset, methods):
    h, w = most_squarelike(len(methods))
    gs = gridspec.GridSpec(h, w)

    fig = plt.figure(tight_layout=True)
    canvas = FigureCanvas(fig)

    for i, method in enumerate(methods):
        wi, hi = i % w, i // w
        ax = fig.add_subplot(gs[hi, wi])
        method(env, dataset, fig=fig, ax=ax)

    plt.tight_layout()
    image = get_canvas_image(canvas)
    plt.close(fig)
    return image


def gcvalue_image(env, dataset, value_fn):
    base_observation = dataset['observations'][0]

    point1, point2, point3, point4 = env.four_goals()
    point3 = (32.75, 24.75)

    fig = plt.figure(tight_layout=True)
    canvas = FigureCanvas(fig)

    points = [point1, point2, point3, point4]
    for i, point in enumerate(points):
        point = np.array(point)
        ax = fig.add_subplot(2, 2, i + 1)

        goal_observation = base_observation.copy()
        goal_observation[:2] = point

        plot_value(env, dataset, partial(value_fn, goal_observation), fig, ax)

        ax.set_title('Goal: ({:.2f}, {:.2f})'.format(point[0], point[1]))
        ax.scatter(point[0], point[1], s=50, c='red', marker='*')

    image = get_canvas_image(canvas)
    plt.close(fig)
    return image


In [3]:

agent_name = 'TempDATA'
env_name = 'antmaze-large-play-v2'

save_dir = 'exp/'
restore_path = None
restore_epoch = None
run_group = 'Debug'
seed = 0
eval_episodes = 50
num_video_episodes = 2
log_interval = 10000
eval_interval = 100000
save_interval = 1000000
batch_size = 512
rollout_batch_size = 256*30
rollout_length = 3
rollout_percent = 0.2
train_steps = 1000000

lr = 3e-4
value_hidden_dim = 512
value_num_layers = 3
actor_hidden_dim = 512
actor_num_layers = 3
discount = 0.99
tau = 0.005
expectile = 0.95
use_layer_norm = 1
skill_dim = 32
skill_expectile = 0.9
skill_temperature = 1
skill_discount = 0.99

smoothing_coef = 0.01
lp_ae = 1
lp_mb = 1
lp_rl = 1

p_currgoal = 0.0
p_trajgoal = 0.625
p_randomgoal = 0.375

planning_num_recursions = 0
planning_num_states = 50000
planning_num_knns = 50

screenshot = 0
encoder = None
decoder = None
p_aug = None

algo_name = None  # Not used, only for logging

# config_flags.DEFINE_config_dict('wandb', default_wandb_config(), lock_config=False)

In [4]:
aux_env = {}
goal_info = {}
if 'antmaze' in env_name:
    env_name = env_name

    if 'ultra' in env_name:
        import d4rl_ext
        import gym
        env = gym.make(env_name)
        env = EpisodeMonitor(env)
    else:
        env = d4rl_utils.make_env(env_name)

    dataset = d4rl_utils.get_dataset(env, env_name, goal_conditioned=True)
    dataset = dataset.copy({'rewards': dataset['rewards'] - 1.0})

    env.render(mode='rgb_array', width=200, height=200)
    if 'ultra' in env_name:
        env.viewer.cam.lookat[0] = 26
        env.viewer.cam.lookat[1] = 18
        env.viewer.cam.distance = 70
        env.viewer.cam.elevation = -90

    elif 'umaze' in env_name:
        env.viewer.cam.lookat[0] = 6
        env.viewer.cam.lookat[1] = 3
        env.viewer.cam.distance = 40
        env.viewer.cam.elevation = -90

    elif 'medium' in env_name:
        env.viewer.cam.lookat[0] = 10
        env.viewer.cam.lookat[1] = 8
        env.viewer.cam.distance = 50
        env.viewer.cam.elevation = -90

    elif 'large' in env_name:
        env.viewer.cam.lookat[0] = 18
        env.viewer.cam.lookat[1] = 12
        env.viewer.cam.distance = 65
        env.viewer.cam.elevation = -90

    viz_env, viz_dataset = d4rl_ant.get_env_and_dataset(env_name)
    viz = ant_diagnostics.Visualizer(env_name, viz_env, viz_dataset, discount=discount)
    init_state = np.copy(viz_dataset['observations'][0])
    init_state[:2] = (12.5, 8)
    aux_env = {
        'viz_env': viz_env,
        'viz_dataset': viz_dataset,
        'viz': viz,
    }

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


Target Goal:  (32.97065962507391, 25.122146318993995)


load datafile: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:01<00:00,  6.83it/s]


Target Goal:  (33.35972718454154, 24.228173652309895)


load datafile: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:01<00:00,  6.84it/s]


Attempting to load from:  /home/bmil12/PycharmProjects/TempDATA/antmaze_aux/antmaze-large-play-v2-aux.npz


In [5]:
base_observation = jax.tree_map(lambda arr: arr[0], dataset['observations'])
env.reset()
discrete = False

train_dataset = GCDataset(
    dataset,
    p_currgoal=p_currgoal, p_trajgoal=p_trajgoal, p_randomgoal=p_randomgoal,
    discount=discount, p_aug=p_aug,
)

total_steps = train_steps
example_batch = dataset.sample(1)

if 'procgen' in env_name:
    discrete = True
    example_action = np.max(dataset['actions'], keepdims=True)
    print('===============================================')
    print(example_action)
    print(example_action.shape)
    print('===============================================')

agent = learner.create_learner(
    seed,
    example_batch['observations'],
    example_batch['actions'] if not discrete else example_action,
    lr=lr,
    value_hidden_dims=(value_hidden_dim,) * value_num_layers,
    actor_hidden_dims=(actor_hidden_dim,) * actor_num_layers,
    discount=discount,
    tau=tau,
    expectile=expectile,
    use_layer_norm=use_layer_norm,
    skill_dim=skill_dim,
    skill_expectile=skill_expectile,
    skill_temperature=skill_temperature,
    skill_discount=skill_discount,
    rollout_length=rollout_length,
    smoothing_coef=smoothing_coef,
    encoder=encoder,
    decoder=decoder,
    discrete=discrete
)

rollout_fn = supply_rng(agent.sample_rollout)

if restore_path is not None:
    restore_path = restore_path
    candidates = glob.glob(restore_path)
    if len(candidates) == 0:
        raise Exception(f'Path does not exist: {restore_path}')
    if len(candidates) > 1:
        raise Exception(f'Multiple matching paths exist for: {restore_path}')
    if restore_epoch is None:
        restore_path = candidates[0] + '/params.pkl'
    else:
        restore_path = candidates[0] + f'/params_{restore_epoch}.pkl'
    with open(restore_path, "rb") as f:
        load_dict = pickle.load(f)
    agent = flax.serialization.from_state_dict(agent, load_dict['agent'])
    print(f'Restored from {restore_path}')

if 'antmaze' in env_name:
    example_trajectory = train_dataset.sample(50, indx=np.arange(1000, 1050), evaluation=True)
elif 'kitchen' in env_name:
    example_trajectory = train_dataset.sample(50, indx=np.arange(0, 50))
elif 'calvin' in env_name:
    example_trajectory = train_dataset.sample(50, indx=np.arange(0, 50))
elif 'procgen-500' in env_name:
    example_trajectory = train_dataset.sample(50, indx=np.arange(5000, 5050))
elif 'procgen-1000' in env_name:
    example_trajectory = train_dataset.sample(50, indx=np.arange(5000, 5050))
else:
    raise NotImplementedError

train_logger = CsvLogger(os.path.join(save_dir, 'train.csv'))
eval_logger = CsvLogger(os.path.join(save_dir, 'eval.csv'))
first_time = time.time()
last_time = time.time()

agent_list = []
for i in tqdm.tqdm(range(1, int(total_steps * lp_ae + 1)), smoothing=0.1, dynamic_ncols=True):
    batch = train_dataset.sample(batch_size)
    agent, update_info = agent.repr_update(batch)
    
    if i % 250000 == 0:
        agent_list.append(agent)

Target Goal:  (32.45636577796535, 25.353937391760372)
Extra kwargs: {}


  deprecation(
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000000/1000000 [39:01<00:00, 427.05it/s]


In [6]:
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

In [7]:
cur_frame = env.render(mode='rgb_array', width=200, height=200)
viz_env, viz_dataset, viz = aux_env['viz_env'], aux_env['viz_dataset'], aux_env['viz']

In [8]:
state = viz_env.reset()

Target Goal:  (4.927227463529692, 8.960417542889838)


# TSNE plot

In [9]:
fig = plt.figure(tight_layout=True)
canvas = FigureCanvas(fig)
ax = plt.gca()

In [49]:
obs = state['observation']
all_x = []
all_y = []
all_z = []

viz_env.draw(ax)

if 'medium' in env_name:
    for i in np.arange(-1.9, 22.1, 0.5):
        for j in np.arange(-1.9, 22.1, 0.5):
            if -2 <= i <= 2 and 6 <= j <= 10:
                pass
            elif 6 <= i <= 14 and -2 <= j <= 2:
                pass
            elif 6 <= i <= 10 and 2 <= j <= 6:
                pass
            elif 14 <= i <= 22 and  6 <= j <= 10:
                pass
            elif 6 <= i <= 10 and 10 <= j <= 14:
                pass
            elif 2 <= i <= 6 and 14 <= j <= 18:
                pass
            elif 14 <= i <= 18 and 14 <= j <= 18:
                pass
            elif 10 <= i <= 14 and 18 <= j <= 22:
                pass
            else:
                all_x.append(i)
                all_y.append(j)
                all_z.append(i+j)
elif 'large' in env_name:
    for i in np.arange(-1.9, 38.1, 0.5):
        for j in np.arange(-1.9, 26.1, 0.5):
            if -2 <= i <= 2 and 18 <= j <= 22:
                pass
            elif 2 <= i <= 10 and 2 <= j <= 6:
                pass
            elif 2 <= i <= 18 and 10 <= j <= 14:
                pass
            elif 6 <= i <= 10 and  14 <= j <= 26:
                pass
            elif 14 <= i <= 18 and 14 <= j <= 22:
                pass
            elif 14 <= i <= 18 and -2 <= j <= 6:
                pass
            elif 22 <= i <= 26 and 2 <= j <= 14:
                pass
            elif 26 <= i <= 34 and 10 <= j <= 14:
                pass
            elif 30 <= i <= 34 and 2 <= j <= 6:
                pass
            elif 30 <= i <= 38 and 18 <= j <= 22:
                pass
            elif 22 <= i <= 26 and 18 <= j <= 26:
                pass
            else:
                all_x.append(i)
                all_y.append(j)
                all_z.append(i+j)

ax.scatter(all_x, all_y, c=all_z, s = 5, marker='o', cmap='magma')

<matplotlib.collections.PathCollection at 0x7fd50727cfa0>

In [50]:
plt.savefig(f'{env_name}.pdf', format='pdf')
plt.show()

In [51]:
import scipy.stats as st
from MulticoreTSNE import MulticoreTSNE as TSNE
import seaborn as sns

In [52]:
stack = np.stack([all_x, all_y], axis=1)
states = np.concatenate([stack, np.repeat(state['observation'][2:].reshape(1, -1), stack.shape[0] ,axis=0)], axis=1)

In [53]:
TSNE_staes = TSNE(n_jobs=4, n_components=2, n_iter=5000).fit_transform(states)

In [54]:
from datetime import date
plt.scatter(TSNE_staes[:, 0], TSNE_staes[:, 1], c=all_z, s=5, marker='o', cmap='magma')
plt.savefig(f'{date.today()}-{env_name}-state-TSNE.pdf', format='pdf')
plt.show()

In [55]:
agent_id = 2
representation = agent_list[agent_id].get_phi(states)

In [None]:
n_iter = 10000
TSNE_repr = TSNE(n_jobs=4, n_components=2, n_iter=n_iter).fit_transform(representation)
plt.scatter(TSNE_repr[:, 0], TSNE_repr[:, 1], c=all_z, s=5, marker='o', cmap='magma')
# plt.savefig(f'{env_name}-representation-TSNE-{n_iter}-{smoothing_coef}-{date.today()}.pdf', format='pdf')
plt.show()

## Value Map

In [20]:
if 'large' in env_name:
    for i in np.arange(-1.5, 38.1, 1):
        for j in np.arange(-1.5, 26.1, 1):
            if -2 <= i <= 2 and 18 <= j <= 22:
                pass
            elif 2 <= i <= 10 and 2 <= j <= 6:
                pass
            elif 2 <= i <= 18 and 10 <= j <= 14:
                pass
            elif 6 <= i <= 10 and  14 <= j <= 26:
                pass
            elif 14 <= i <= 18 and 14 <= j <= 22:
                pass
            elif 14 <= i <= 18 and -2 <= j <= 6:
                pass
            elif 22 <= i <= 26 and 2 <= j <= 14:
                pass
            elif 26 <= i <= 34 and 10 <= j <= 14:
                pass
            elif 30 <= i <= 34 and 2 <= j <= 6:
                pass
            elif 30 <= i <= 38 and 18 <= j <= 22:
                pass
            elif 22 <= i <= 26 and 18 <= j <= 26:
                pass
            else:
                all_x.append(i)
                all_y.append(j)
                all_z.append(i+j)

In [21]:
stack = np.stack([all_x, all_y], axis=1)
states = np.concatenate([stack, np.repeat(state['observation'][2:].reshape(1, -1), stack.shape[0] ,axis=0)], axis=1)

In [38]:
goal_x = 2
goal_y = 2
goal_stack = np.stack([[goal_x], [goal_y]], axis=1)
goal_state = np.concatenate([goal_stack, state['observation'][2:].reshape(1, -1)], axis=1)

In [39]:
(v1, v2) = agent_list[2].network(states, goal_state, method='value') 
v = (v1 + v2) / 2
normalized_v = (v - np.min(v)) / (np.max(v) - np.min(v))

In [40]:
fig = plt.figure(tight_layout=True)
canvas = FigureCanvas(fig)
ax = plt.gca()
ax.scatter(all_x, all_y, c=normalized_v, s = 19, marker='s', cmap='viridis')
plt.savefig(f'{env_name}-valuemap-{goal_x}-{goal_y}-{n_iter}-{smoothing_coef}-{date.today()}.pdf', format='pdf')
plt.show()