In [19]:
from pathlib import Path
import sys
import numpy as np
import gymnasium as gym
from stable_baselines3.ppo import PPO
from stable_baselines3.sac import SAC

PROJECT_ROOT_DIR = Path().absolute().parent.parent
if str(PROJECT_ROOT_DIR.absolute()) not in sys.path:
    sys.path.append(str(PROJECT_ROOT_DIR.absolute()))

from envs.custom_pendulum import CustomPendulumEnv
from utils.collect_samples import collect_samples

PROJECT_ROOT_DIR

PosixPath('/home/ucav/pythonprojects/rl_sys_id')

In [20]:
env_config_real = {
    "g": 9.5,
    "m": 0.9,
    "l": 1.2,
}
env_real = CustomPendulumEnv.get_env_from_config(config=env_config_real)
# env_real = gym.make("CustomPendulum-v0", **env_config_real)

In [21]:
env_real

<envs.custom_pendulum.CustomPendulumEnv at 0x7b91dc44cd10>

In [22]:
env_real.unwrapped.g, env_real.unwrapped.m, env_real.unwrapped.l

(9.5, 0.9, 1.2)

In [23]:
policy_class = SAC
policy_path = PROJECT_ROOT_DIR / "checkpoints/custom_pendulum/sac/g_10_0_m_1_0_l_1_0/seed_1/best_model.zip"
algo = policy_class.load(policy_path, env=env_real)

collect_real_sample_num = 10000

obs_real, act_real, next_obs_real = collect_samples(
    policy=algo.policy,
    env=env_real,
    num_samples=collect_real_sample_num,
    per_episode_samples=100,
    deterministic=True,
)

print(f"length of obs_real: {len(obs_real)}, length of act_real: {len(act_real)}, length of next_obs_real: {len(next_obs_real)}")
print(type(obs_real), type(act_real), type(next_obs_real))
print(obs_real)
print(act_real)
print(next_obs_real)

np.save(PROJECT_ROOT_DIR / "data/custom_pendulum/obs_real.npy", obs_real, allow_pickle=True)
np.save(PROJECT_ROOT_DIR / "data/custom_pendulum/act_real.npy", act_real, allow_pickle=True)
np.save(PROJECT_ROOT_DIR / "data/custom_pendulum/next_obs_real.npy", next_obs_real, allow_pickle=True)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
reset num: 100
length of obs_real: 10000, length of act_real: 10000, length of next_obs_real: 10000
<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>
[[ 0.6520163   0.758205   -0.46042657]
 [ 0.6454508   0.7638019   0.17254764]
 [ 0.6141476   0.7891912   0.80615777]
 ...
 [ 0.9994345  -0.03362619  0.11655765]
 [ 0.99960446 -0.02812359  0.11010458]
 [ 0.99973625 -0.02296495  0.10320653]]
[[1.5793056 ]
 [1.5560877 ]
 [0.7052851 ]
 ...
 [0.11674786]
 [0.08467484]
 [0.05107832]]
[[ 0.6454508   0.7638019   0.17254764]
 [ 0.6141476   0.7891912   0.80615777]
 [ 0.559255    0.82899565  1.3563702 ]
 ...
 [ 0.99960446 -0.02812359  0.11010458]
 [ 0.99973625 -0.02296495  0.10320653]
 [ 0.99983454 -0.01819182  0.09548293]]


In [24]:
a = np.load(str(PROJECT_ROOT_DIR / "data/custom_pendulum/obs_real.npy"), allow_pickle=True)
a

array([[ 0.6520163 ,  0.758205  , -0.46042657],
       [ 0.6454508 ,  0.7638019 ,  0.17254764],
       [ 0.6141476 ,  0.7891912 ,  0.80615777],
       ...,
       [ 0.9994345 , -0.03362619,  0.11655765],
       [ 0.99960446, -0.02812359,  0.11010458],
       [ 0.99973625, -0.02296495,  0.10320653]], dtype=float32)

In [25]:
from gymnasium.utils import seeding

rng, _ = seeding.np_random(10)
for i in range(10):
    print(rng.choice(10, 5, replace=True))

[7 9 2 2 7]
[8 5 1 8 5]
[1 1 4 6 4]
[8 0 4 5 9]
[2 8 0 3 7]
[5 9 7 9 8]
[1 9 8 1 9]
[7 3 1 3 9]
[7 2 4 8 6]
[3 1 9 2 5]
