# Methods for loading a trained agent provided by Markel

In [1]:
import json
import os

from typing import Any

from gymnasium.spaces import Box
import numpy as np

import torch

from omnisafe.common import Normalizer
from omnisafe.envs.wrapper import ActionRepeat, ActionScale, ObsNormalize, TimeLimit
from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
from omnisafe.utils.config import Config
from omnisafe.envs.core import CMDP, make
from omnisafe.algorithms.model_based.base.ensemble import EnsembleDynamicsModel
from omnisafe.models.actor import ActorBuilder
from typing import Dict, Tuple, Any


def _load_model_and_env(
    save_dir: str,
    model_name: str,
    cfgs: Config,
    env_kwargs: Dict[str, Any],
) -> None:
    """Load the model from the save directory.

    Args:
        save_dir (str): Directory where the model is saved.
        model_name (str): Name of the model.
        env_kwargs (dict[str, Any]): Keyword arguments for the environment.

    Raises:
        FileNotFoundError: If the model is not found.
    """
    # load the saved model
    model_path = os.path.join(save_dir, 'torch_save', model_name)
    try:
        model_params = torch.load(model_path)
    except FileNotFoundError as error:
        raise FileNotFoundError('The model is not found in the save directory.') from error

    # load the environment
    env = make(**env_kwargs)

    observation_space = env.observation_space
    action_space = env.action_space
    if 'Saute' in cfgs['algo'] or 'Simmer' in cfgs['algo']:
        safety_budget = (
            cfgs.algo_cfgs.safety_budget
            * (1 - cfgs.algo_cfgs.saute_gamma**cfgs.algo_cfgs.max_ep_len)
            / (1 - cfgs.algo_cfgs.saute_gamma)
            / cfgs.algo_cfgs.max_ep_len
            * torch.ones(1)
        )
    assert isinstance(observation_space, Box), 'The observation space must be Box.'
    assert isinstance(action_space, Box), 'The action space must be Box.'

    if cfgs['algo_cfgs']['obs_normalize']:
        obs_normalizer = Normalizer(shape=observation_space.shape, clip=5)
        obs_normalizer.load_state_dict(model_params['obs_normalizer'])
        env = ObsNormalize(env, device=torch.device('cpu'), norm=obs_normalizer)
    if env.need_time_limit_wrapper:
        env = TimeLimit(env, device=torch.device('cpu'), time_limit=1000)
    env = ActionScale(env, device=torch.device('cpu'), low=-1.0, high=1.0)

    if hasattr(cfgs['algo_cfgs'], 'action_repeat'):
        env = ActionRepeat(
            env,
            device=torch.device('cpu'),
            times=cfgs['algo_cfgs']['action_repeat'],
        )
    if hasattr(cfgs, 'algo') and cfgs['algo'] in [
        'LOOP',
        'SafeLOOP',
        'PETS',
        'CAPPETS',
        'RCEPETS',
        'CCEPETS',
    ]:
        dynamics_state_space = (
            env.coordinate_observation_space
            if env.coordinate_observation_space is not None
            else env.observation_space
        )
        assert env.action_space is not None and isinstance(
            env.action_space.shape,
            tuple,
        )
        if isinstance(env.action_space, Box):
            action_space = env.action_space
        else:
            raise NotImplementedError
        if cfgs['algo'] in ['LOOP', 'SafeLOOP']:
            actor_critic = ConstraintActorQCritic(
                obs_space=dynamics_state_space,
                act_space=action_space,
                model_cfgs=cfgs.model_cfgs,
                epochs=1,
            )
        if actor_critic is not None:
            actor_critic.load_state_dict(model_params['actor_critic'])
            actor_critic.to('cpu')
        dynamics = EnsembleDynamicsModel(
            model_cfgs=cfgs.dynamics_cfgs,
            device=torch.device('cpu'),
            state_shape=dynamics_state_space.shape,
            action_shape=action_space.shape,
            actor_critic=actor_critic,
            rew_func=None,
            cost_func=env.get_cost_from_obs_tensor,
            terminal_func=None,
        )
        dynamics.ensemble_model.load_state_dict(model_params['dynamics'])
        dynamics.ensemble_model.to('cpu')
        if cfgs['algo'] in ['CCEPETS', 'RCEPETS', 'SafeLOOP']:
            algo_to_planner = {
                'CCEPETS': (
                    'CCEPlanner',
                    {'cost_limit': cfgs['algo_cfgs']['cost_limit']},
                ),
                'RCEPETS': (
                    'RCEPlanner',
                    {'cost_limit': cfgs['algo_cfgs']['cost_limit']},
                ),
                'SafeLOOP': (
                    'SafeARCPlanner',
                    {
                        'cost_limit': cfgs['algo_cfgs']['cost_limit'],
                        'actor_critic': actor_critic,
                    },
                ),
            }
        elif cfgs['algo'] in ['PETS', 'LOOP']:
            algo_to_planner = {
                'PETS': ('CEMPlanner', {}),
                'LOOP': ('ARCPlanner', {'actor_critic': actor_critic}),
            }
        elif cfgs['algo'] in ['CAPPETS']:
            lagrange: torch.nn.Parameter = torch.nn.Parameter(
                model_params['lagrangian_multiplier'].to('cpu'),
                requires_grad=False,
            )
            algo_to_planner = {
                'CAPPETS': (
                    'CAPPlanner',
                    {
                        'cost_limit': cfgs['lagrange_cfgs']['cost_limit'],
                        'lagrange': lagrange,
                    },
                ),
            }
        planner_name = algo_to_planner[cfgs['algo']][0]
        planner_special_cfgs = algo_to_planner[cfgs['algo']][1]
        planner_cls = globals()[f'{planner_name}']
        planner = planner_cls(
            dynamics=dynamics,
            planner_cfgs=cfgs.planner_cfgs,
            gamma=float(cfgs.algo_cfgs.gamma),
            cost_gamma=float(cfgs.algo_cfgs.cost_gamma),
            dynamics_state_shape=dynamics_state_space.shape,
            action_shape=action_space.shape,
            action_max=1.0,
            action_min=-1.0,
            device='cpu',
            **planner_special_cfgs,
        )

    else:
        if 'Saute' in cfgs['algo'] or 'Simmer' in cfgs['algo']:
            observation_space = Box(
                low=np.hstack((observation_space.low, -np.inf)),
                high=np.hstack((observation_space.high, np.inf)),
                shape=(observation_space.shape[0] + 1,),
            )
        actor_type = cfgs['model_cfgs']['actor_type']
        pi_cfg = cfgs['model_cfgs']['actor']
        weight_initialization_mode = cfgs['model_cfgs']['weight_initialization_mode']
        actor_builder = ActorBuilder(
            obs_space=observation_space,
            act_space=action_space,
            hidden_sizes=pi_cfg['hidden_sizes'],
            activation=pi_cfg['activation'],
            weight_initialization_mode=weight_initialization_mode,
        )
        actor = actor_builder.build_actor(actor_type)
        actor.load_state_dict(model_params['pi'])

    return env, actor


def _load_cfgs(save_dir):
    cfg_path = os.path.join(save_dir, 'config.json')
    try:
        with open(cfg_path, encoding='utf-8') as file:
            kwargs = json.load(file)
    except FileNotFoundError as error:
        raise FileNotFoundError(
            f'The config file is not found in the save directory{save_dir}.',
        ) from error
    return Config.dict2config(kwargs)


# LOG_DIR should contain two things:
# 1. config.json
# 2. torch_save/{model_name}
#
# model_name usually looks like 'epoch-500.pt'
def load_guide(save_dir, model_name) -> Tuple[CMDP, ConstraintActorQCritic]:
    cfgs = _load_cfgs(save_dir)

    env_kwargs = {
        'env_id': cfgs['env_id'],
        'num_envs': 1,
    }

    env, actor = _load_model_and_env(save_dir, model_name, cfgs, env_kwargs)
    return env, actor


# Generating a dataset

In [2]:
from omnisafe.models.actor import GaussianLearningActor
import safety_gymnasium
import torch

env = safety_gymnasium.make('SafetyPointGoal1-v0', max_episode_steps=1000)

def create_random_agent(env, hidden_layers=[255,255,255,255], activation='relu', weight_initialization_mode='orthogonal'):
    obs_space = env.observation_space
    act_space = env.action_space
    return GaussianLearningActor(obs_space, act_space, hidden_layers, activation=activation, weight_initialization_mode=weight_initialization_mode)

In [3]:
env.obs_space_dict.keys()

KeysView(Dict('accelerometer': Box(-inf, inf, (3,), float64), 'velocimeter': Box(-inf, inf, (3,), float64), 'gyro': Box(-inf, inf, (3,), float64), 'magnetometer': Box(-inf, inf, (3,), float64), 'goal_lidar': Box(0.0, 1.0, (16,), float64), 'hazards_lidar': Box(0.0, 1.0, (16,), float64), 'vases_lidar': Box(0.0, 1.0, (16,), float64)))

In [4]:
env.action_space

Box(-1.0, 1.0, (2,), float64)

In [None]:
import numpy as np

def run_trajectory(env, agent, safe_agent, min_rand_steps=100, max_rand_steps=400, cost_window=200, deterministic=True):
    observation, info = env.reset()
    episode_over = False
    is_sampling = False
    sampled_cost = 0
    sampling_step = 0
    agent_instance_for_pos = env.unwrapped.__getattribute__("task").agent
    data = []
    labels = []
    # get a random number for the amount of steps the random agent should take before a sample is created
    num_steps = np.random.randint(min_rand_steps, max_rand_steps)
    # gather data
    for i in range(num_steps):
        # Discard trajectory if the agent moves out of the checkered 7x7 space, coordinates were tested manually
        if abs(agent_instance_for_pos.pos[0]) >= 3.5 or abs(agent_instance_for_pos.pos[1]) >= 3.5: 
            break
        obs_tensor = torch.from_numpy(observation).float()
        action = agent.predict(obs_tensor, deterministic=deterministic).detach().numpy()
        if i == num_steps - 1:
            data.append(np.append(observation, action)) 
        observation, reward, cost, terminated, truncated, info = env.step(action)
        episode_over = terminated or truncated
        if episode_over:
            break
    if not episode_over:
        # sample with the pre trained agent
        for i in range(cost_window):
            obs_tensor = torch.from_numpy(observation).float()
            action = safe_agent.predict(obs_tensor, deterministic=deterministic).detach().numpy()
            observation, reward, cost, terminated, truncated, info = env.step(action)
            episode_over = terminated or truncated
            sampled_cost += cost
            sampling_step += 1
            if episode_over or i == cost_window - 1:
                labels.append(sampled_cost)
                break
    if len(labels) == 0:
        # If episode ended before sampling could happen, return an empty data and labels array
        # More of a safety measure, probably obsolete
        data = []
    if len(data) == 0:
        labels = []
    env.close()
    # assertion to make sure every data point has a label
    print(len(data), len(labels))
    assert len(data) == len(labels)
    return np.array(data), np.array(labels)

In [37]:
# load my saved trained agent
safe_agent = load_guide("../runs/PPOLag-{SafetyPointGoal1-v0}/seed-000-2025-05-13-17-51-08", "epoch-50.pt")[1]

  model_params = torch.load(model_path)


In [38]:
data, labels = run_trajectory(env, create_random_agent(env), safe_agent)
labels.shape

(1,)

In [39]:
print(labels)

[0.]


In [40]:
data.shape

(1, 62)

In [41]:
len(data)

1

In [42]:
from tqdm import tqdm

def generate_dataset(env, safe_agent, amount=1000):
    data = []
    labels = []
    for _ in tqdm(range(amount), desc="Generating data points", unit=" sample "):
        data_i = []
        while len(data_i) == 0: # Ensure that a sample is generated
            data_i, labels_i = run_trajectory(env, create_random_agent(env), safe_agent)
        data.append(data_i)
        labels.append(labels_i)
    return np.concatenate(data, axis=0)[:amount], np.concatenate(labels, axis=0)[:amount]

In [51]:
data, labels = generate_dataset(env, safe_agent, amount=10000)

Generating data points:   0%|          | 0/10000 [00:00<?, ? sample /s]

0 1





AssertionError: 

In [None]:
data

array([[  2.85013843,   0.09889716,   9.81      , ...,   0.        ,
          0.51997632,   1.10723567],
       [  2.18100397,  11.70111903,   9.81      , ...,   0.        ,
          0.49840754,  -1.83509505],
       [ -4.60043508,  -9.89569702,   9.81      , ...,   0.        ,
         -0.20032974,   0.43169472],
       ...,
       [ -3.24858488, -11.52092151,   9.81      , ...,   0.        ,
         -0.18175644,   0.15351345],
       [  0.36310884,   0.23373699,   9.81      , ...,   0.        ,
          0.82411647,   1.38031697],
       [  2.24830268,   0.29231609,   9.81      , ...,   0.        ,
          0.50413424,   0.66061491]])

In [114]:
data.shape

(10000, 62)

In [113]:
labels

array([0., 0., 0., ..., 1., 1., 1.])

In [115]:
labels.shape

(10000,)

# Preprocessing the dataset for better NN performance

In [80]:
%pip install scikit-learn scipy pandas

Note: you may need to restart the kernel to use updated packages.


In [116]:
obs_column_names = ['accelerometer0', 'accelerometer1', 'accelerometer2', 'velocimeter0', 'velocimeter1', 'velocimeter2', 'gyro0', 'gyro1', 'gyro2', 'magnetometer0', 'magnetometer1', 'magnetometer2']
for key in ['goal_lidar', 'hazards_lidar', 'vases_lidar']:
    for i in range(16):
        obs_column_names.append(key+str(i))
obs_column_names.append("action0")
obs_column_names.append("action1")
len(obs_column_names)

62

In [117]:
import pandas as pd
df = pd.DataFrame(data=data, columns=obs_column_names)
df['exp_cost'] = labels
df.head()

Unnamed: 0,accelerometer0,accelerometer1,accelerometer2,velocimeter0,velocimeter1,velocimeter2,gyro0,gyro1,gyro2,magnetometer0,...,vases_lidar9,vases_lidar10,vases_lidar11,vases_lidar12,vases_lidar13,vases_lidar14,vases_lidar15,action0,action1,exp_cost
0,-2.859485,-6.547374,9.81,-0.002183,-0.022451,0.0,0.0,0.0,1.187039,0.439323,...,0.0,0.0,0.048732,0.58323,0.534497,0.0,0.0,-0.308633,0.632612,0.0
1,-3.285026,15.194056,9.81,0.221309,0.20702,0.0,0.0,0.0,1.295281,-0.168696,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.787477,-1.150739,0.0
2,1.982068,-0.473029,9.81,0.552082,-0.056404,0.0,0.0,0.0,2.837879,-0.003965,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.468978,0.762941,0.0
3,1.987091,-3.81506,9.81,0.504868,-0.097781,0.0,0.0,0.0,1.895468,0.423378,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.366803,-0.409778,0.0
4,1.688365,0.27868,9.81,0.707879,0.046088,0.0,-0.0,0.0,-2.897179,0.450792,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.873569,-0.257621,0.0


In [118]:
# save the create dataset in a pickle file
df.to_pickle("SafetyPointGoal1Dataset0.pkl")

In [119]:
df.describe()

Unnamed: 0,accelerometer0,accelerometer1,accelerometer2,velocimeter0,velocimeter1,velocimeter2,gyro0,gyro1,gyro2,magnetometer0,...,vases_lidar9,vases_lidar10,vases_lidar11,vases_lidar12,vases_lidar13,vases_lidar14,vases_lidar15,action0,action1,exp_cost
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,...,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.215973,0.208202,9.81,0.381894,-0.124796,0.0,0.0,0.0,0.688522,0.033,...,0.03088,0.0276,0.023616,0.019791,0.018726,0.017807,0.016469,0.24606,0.208537,0.7369
std,2.663695,7.699414,1.494856e-15,0.249333,0.164913,0.0,0.0,0.0,1.96119,0.354208,...,0.117286,0.114021,0.106218,0.094793,0.093052,0.090702,0.08285,0.62143,0.84501,5.49528
min,-5.319843,-18.154482,9.81,-1.264663,-0.975777,0.0,0.0,0.0,-2.998426,-0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.362725,-1.99562,0.0
25%,-3.135914,-5.502001,9.81,0.233487,-0.172205,0.0,0.0,0.0,-0.411241,-0.32005,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.382356,-0.0223,0.0
50%,1.815366,0.35943,9.81,0.387127,-0.10018,0.0,0.0,0.0,1.23927,0.072442,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.464822,0.445041,0.0
75%,2.266183,1.782016,9.81,0.543069,-0.038952,0.0,0.0,0.0,2.161658,0.384359,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.750187,0.709381,0.0
max,5.499027,18.905644,9.81,1.464998,0.733211,0.0,0.0,0.0,3.024529,0.5,...,0.931661,0.932327,0.932784,0.932326,0.922131,0.92273,0.914572,2.151439,2.642204,127.0


In [120]:
# Seperate the dataset into data and label again
X = df.drop(columns=["exp_cost"])
y = df.exp_cost

In [121]:
# Scaling the data with MinMaxScaler
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler((0,1))
scaled = scaler.fit_transform(X)
scaled_X = pd.DataFrame(scaled, columns=obs_column_names)
scaled_X.head()

Unnamed: 0,accelerometer0,accelerometer1,accelerometer2,velocimeter0,velocimeter1,velocimeter2,gyro0,gyro1,gyro2,magnetometer0,...,vases_lidar8,vases_lidar9,vases_lidar10,vases_lidar11,vases_lidar12,vases_lidar13,vases_lidar14,vases_lidar15,action0,action1
0,0.227414,0.313197,1.0,0.462505,0.557831,0.0,0.0,0.0,0.694919,0.939323,...,0.0,0.0,0.0,0.052244,0.625564,0.579633,0.0,0.0,0.299955,0.566695
1,0.18808,0.89985,1.0,0.54438,0.692104,0.0,0.0,0.0,0.71289,0.331304,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.611867,0.182172
2,0.674924,0.477102,0.5,0.665557,0.537963,0.0,0.0,0.0,0.96901,0.496035,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.805797,0.594796
3,0.675388,0.386923,1.0,0.64826,0.513752,0.0,0.0,0.0,0.81254,0.923378,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.492159,0.341937
4,0.647776,0.497385,0.25,0.722633,0.597936,0.0,0.0,0.0,0.01681,0.950792,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.636366,0.374744


In [122]:
# Divide data into train, test and validation set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2)
X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size=0.2)

In [123]:
X_train.shape

(6400, 62)

In [124]:
X_validate.shape

(1600, 62)

In [125]:
X_test.shape

(2000, 62)

In [126]:
from torch.utils.data import TensorDataset

# Convert dataset into a TensorDataset
train_dataset = TensorDataset(torch.from_numpy(X_train.values.astype(np.float32)), torch.from_numpy(y_train.values.astype(np.float32)))
validation_dataset = TensorDataset(torch.from_numpy(X_validate.values.astype(np.float32)), torch.from_numpy(y_validate.values.astype(np.float32)))
test_dataset = TensorDataset(torch.from_numpy(X_test.values.astype(np.float32)), torch.from_numpy(y_test.values.astype(np.float32)))


In [170]:
from torch.utils.data import DataLoader

# Putting data into dataloaders for PyTorch
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Building a model 

In [128]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ExpectedCostNN(nn.Module):
    def __init__(self):
        super(ExpectedCostNN, self).__init__()
        self.fc1 = nn.Linear(62, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
    
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return x

In [129]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [130]:
model = ExpectedCostNN().to(device)
print(model)

ExpectedCostNN(
  (fc1): Linear(in_features=62, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
)


# Training a neural network on the dataset

In [None]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
# code from https://docs.pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X).squeeze()
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 10 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [167]:
# code adapted to regression problem from original code https://docs.pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss= 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X).squeeze()
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    print(f"Avg loss: {test_loss:>8f} \n")

In [171]:
# code from https://docs.pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, model, loss_fn, optimizer)
    test(test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.031250  [   64/ 6400]
loss: 151.093750  [  704/ 6400]
loss: 1.593750  [ 1344/ 6400]
loss: 45.562500  [ 1984/ 6400]
loss: 0.062500  [ 2624/ 6400]
loss: 1.296875  [ 3264/ 6400]
loss: 30.078125  [ 3904/ 6400]
loss: 25.765625  [ 4544/ 6400]
loss: 16.000000  [ 5184/ 6400]
loss: 48.421875  [ 5824/ 6400]
Avg loss: 29.622070 

Epoch 2
-------------------------------
loss: 2.500000  [   64/ 6400]
loss: 1.468750  [  704/ 6400]
loss: 22.390625  [ 1344/ 6400]
loss: 29.328125  [ 1984/ 6400]
loss: 8.984375  [ 2624/ 6400]
loss: 18.656250  [ 3264/ 6400]
loss: 111.781250  [ 3904/ 6400]
loss: 0.015625  [ 4544/ 6400]
loss: 0.046875  [ 5184/ 6400]
loss: 4.828125  [ 5824/ 6400]
Avg loss: 29.622070 

Epoch 3
-------------------------------
loss: 0.015625  [   64/ 6400]
loss: 26.546875  [  704/ 6400]
loss: 8.296875  [ 1344/ 6400]
loss: 0.031250  [ 1984/ 6400]
loss: 15.031250  [ 2624/ 6400]
loss: 0.015625  [ 3264/ 6400]
loss: 1.593750  [ 3904/ 6400]
loss: 13.343

# Evaluation of the model / Hyperparameter tuning