In [None]:
from agent import TelosAgent
from environment import TelosTaskEnv, make_env
from task import TelosTask
from time import sleep
import pybullet as p

from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3 import SAC, PPO


In [2]:
from standing_task.standing_task import StandingTelosTask
from standing_task.standing_environment import StandingTelosTaskEnv

In [3]:
import RMP
import utils.stability_functions as sf
import utils.target_functions as tfun
from utils.PyBullet import PyBullet
from utils.helper import load_yaml

In [None]:
pb = PyBullet(render_mode="rgb_array",renderer="Tiny")
telos_agent = TelosAgent(pb)
telos_task = StandingTelosTask(agent=telos_agent,sim_engine=pb)
telos_env = StandingTelosTaskEnv(task=telos_task, agent=telos_agent, sim_engine=pb)

In [None]:
telos_task.sim.get_pitch_angle(telos_task.agent.robot_agent),telos_task.sim.get_roll_angle(telos_task.agent.robot_agent),telos_task.sim.get_yaw_angle(telos_task.agent.robot_agent)

In [6]:
test_model = SAC.load("tqc_standing_task")

In [7]:
obs,_ = telos_env.reset()

In [None]:
obs["agent"]

In [None]:
for i in range(2):
    obs,_ = telos_env.reset()
    done = False
    while not done:
        action,_ = test_model.predict(obs, deterministic=True)
        obs, rewards, done,_, info = telos_env.step(action)
        if done:
            sleep(8)
        telos_env.render()
        sleep(1/240)
    print(f"Episode {i} done")

In [5]:
env = DummyVecEnv([lambda: telos_env])
env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

In [7]:
import time
from tqdm import tqdm

In [54]:
telos_env.reset()
angular_velocities = []

In [None]:
for _ in tqdm(range(1_000)):
    action = telos_env.action_space.sample()
    obs, _, _, _, _ = telos_env.step(action)
    obs = obs["agent"]
    # first 7 values are the joint positions, next 12 values are angular velocities, the shape is 34
    # print(obs,"\n",obs[7:19])
    angular_velocities.extend(obs[7:19].flatten().tolist())
    # for x in obs[7:19]:
    #     angular_velocities.append(x)
    # time.sleep(1/240)
    

In [56]:
import numpy as np

In [None]:
angular_velocities

In [None]:
max(angular_velocities), min(angular_velocities), np.mean(angular_velocities)

In [None]:
act = env.action_space.sample()
env.step(act)

In [None]:
for _ in range(1000):
    env.step(env.action_space.sample())

In [7]:
# Saving the statistics
env.save("vec_normalize_stats.pkl")

# Loading the statistics
env = DummyVecEnv([lambda: telos_env])
env = VecNormalize.load("vec_normalize_stats.pkl", env)

In [None]:
obs = env.reset()
print(f"Initial normalized observation: {obs}")


In [None]:
import numpy as np

obs_mean = []
for _ in range(10000):
    obs = env.reset()['agent']
    obs_mean.append(obs[0])

obs_mean = np.mean(obs_mean, axis=0)
obs_var = np.var(obs_mean, axis=0)

print(f"Mean of observations: {obs_mean}")
print(f"Variance of observations: {obs_var}")


In [6]:
telos_env.sim.step_simulation()

In [None]:
telos_env.step(telos_env.action_space.sample())

In [None]:
_config = load_yaml("pybullet_config.yaml")

In [None]:
_config["task"]["fall_threshold"]

In [None]:

Kp = _config["stability_policy"]["Kp"]
Kd = _config["stability_policy"]["Kd"]

z_position_emphasis = _config["stability_metric"]["z_position_emphasis"]
pitch_emphasis = _config["stability_metric"]["pitch_emphasis"]

In [None]:
stability_rmp = RMP.RMP(
    sf.get_stability_task_map_from_obs,
    sf.stability_policy,
    sf.stability_metric,
    Kp=Kp,
    Kd=Kd,
    z_position_emphasis=z_position_emphasis,
    pitch_emphasis=pitch_emphasis)
target_rmp = RMP.RMP(
    lambda obs: sf.target_task_map(obs, telos_env.task.goal),
    tfun.target_policy,
    tfun.target_metric,
    Kp=Kp,
    Kd=Kd,
    z_position_emphasis=z_position_emphasis,
    pitch_emphasis=pitch_emphasis
)
global_rmp_policy = RMP.GlobalRMPPolicy(
    [stability_rmp, target_rmp]
)

In [None]:
env = RMP.RMPRewardWrapper(telos_env, global_rmp_policy)

In [None]:
model = PPO('MultiInputPolicy', telos_env, verbose=1)
model.learn(total_timesteps=50)

In [None]:
env.step(telos_env.action_space.sample())

---

In [None]:
telos_agent = TelosAgent(render_mode="rgb_array",renderer="Tiny")
telos_task = TelosTask(agent=telos_agent)
telos_env = TelosTaskEnv(task=telos_task, agent=telos_agent)

In [None]:
env = make_vec_env(make_env, n_envs=4, env_kwargs={"task": telos_task, "agent": telos_agent, "render_mode": "rgb_array"})
env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

In [None]:
policy_sac =  dict(net_arch=dict(pi=[512, 512,512], qf=[512, 512,512]))

In [None]:
model_sac = SAC(
    policy="MultiInputPolicy",
    env=env,
    policy_kwargs=policy_sac,
    learning_rate= 3.5e-4,
    buffer_size= int(1e6),
    batch_size= 2048,
    ent_coef= "auto",
    gamma= 0.99,
    tau= 0.005,
    train_freq= 1,
    gradient_steps= 1,
    action_noise= None,
    optimize_memory_usage= False,
    target_update_interval= 1,
    verbose= 1,
    tensorboard_log="./tensorboard/sacTelos/",
)

In [None]:
model_sac.learn(500_000)

In [None]:
for _ in range(1000):
    # print(telos_task.compute_reward(telos_agent._get_obs()[0:3], telos_task.goal))
    telos_agent.step_simulation()
    sleep(1.0/60.0)