**Importy**

In [None]:
# Importy
import gymnasium as gym
import stable_baselines3
from stable_baselines3 import SAC,TD3 # Algoritmus
from stable_baselines3.common.evaluation import evaluate_policy # Vyhodnotenie modelu
from stable_baselines3.common.logger import configure # Logger pre tensorboard
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.vec_env import SubprocVecEnv
import numpy as np
import torch

# Check GPU
print("GPU Available:", torch.cuda.is_available())

**Setup**

In [None]:
benchmark = "BipedalWalkerHardcore-v3"
model_ = TD3  # A2C -> trenovat na CPU
max_stepov_na_epizodu = 1000 # Target pre model
num_envs = 6  # Number of parallel environments



# Custom wrapper to penalize standing still
class CustomBipedalWalker(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)

# Create vectorized environments
vec_env = SubprocVecEnv([lambda: CustomBipedalWalker(gym.make(benchmark, max_episode_steps=max_stepov_na_epizodu)) for _ in range(num_envs)])

# Action noise
action_noise = NormalActionNoise(
    mean=np.zeros(vec_env.action_space.shape), 
    sigma=0.2 * np.ones(vec_env.action_space.shape)
)

# Definovanie modelu
model = model_('MlpPolicy', vec_env, verbose=1, device="cuda", action_noise=action_noise, tensorboard_log="./log/" + model_.__name__ + "_" + benchmark, 
               batch_size=256, 
               learning_rate=0.001, 
               buffer_size=1000000, 
               gamma=0.99,
               learning_starts=1000,
               )

**Trening**

In [None]:
# Train model
model.learn(total_timesteps=1000000, log_interval=1, progress_bar=True)

# Evaluate policy
mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=1, deterministic=False)
print(f"Priemerná odmena: {mean_reward} ± {std_reward}")
mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=1, deterministic=True)
print(f"Priemerná deterministic odmena: {mean_reward} ± {std_reward}")

# Save model
model.save(model_.__name__ + "_" + benchmark + "_model")

**Testovanie**

In [None]:
model = model_.load(model_.__name__ + "_" + benchmark + "_model") # Načítanie modelu
env = gym.make(benchmark, render_mode="human", max_episode_steps=max_stepov_na_epizodu)


# Spustenie evaluacie
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10, deterministic=True)
print(mean_reward, std_reward)
env.close()