In [3]:
# SPDX-FileCopyrightText: Copyright (c) 2022 Guillaume Bellegarda. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Copyright (c) 2022 EPFL, Guillaume Bellegarda

import os, sys
import gym
import numpy as np
import time
import matplotlib
import matplotlib.pyplot as plt
from sys import platform
# may be helpful depending on your system
# if platform =="darwin": # mac
#   import PyQt5
#   matplotlib.use("Qt5Agg")
# else: # linux
#   matplotlib.use('TkAgg')

# stable-baselines3
from stable_baselines3.common.monitor import load_results 
from stable_baselines3.common.vec_env import VecNormalize
from stable_baselines3 import PPO, SAC
# from stable_baselines3.common.cmd_util import make_vec_env
from stable_baselines3.common.env_util import make_vec_env # fix for newer versions of stable-baselines3

from env.quadruped_gym_env import QuadrupedGymEnv
# utils
from utils.utils import plot_results
from utils.file_utils import get_latest_model, load_all_results


LEARNING_ALG = "SAC"
interm_dir = "./logs/intermediate_models/"
# path to saved models, i.e. interm_dir + '102824115106'
log_dir = interm_dir + '112624141434'

# initialize env configs (render at test time)
# check ideal conditions, as well as robustness to UNSEEN noise during training
env_config = {}
env_config['render'] = True
env_config['record_video'] = False
env_config['add_noise'] = False 
# env_config['competition_env'] = True

# get latest model and normalization stats, and plot 
stats_path = os.path.join(log_dir, "vec_normalize.pkl")


In [4]:
interm_dir

'./logs/intermediate_models/'

In [5]:
log_dir

'./logs/intermediate_models/112624141434'

In [6]:
folder = log_dir

In [8]:
timesteps = load_results(folder)
timesteps

Unnamed: 0,index,r,l,t
0,0,0.446016,1001,6.490527
1,1,0.892443,1001,14.642202
2,2,0.359624,963,21.161319
3,3,0.453611,1001,27.823334
4,4,0.415433,837,33.453901
...,...,...,...,...
120,120,0.113442,336,1990.249951
121,121,0.419759,144,1998.143374
122,122,0.942416,141,2006.465006
123,123,0.061000,319,2022.741612


In [10]:
num_timesteps = 10e10
timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
timesteps

Unnamed: 0,index,r,l,t
0,0,0.446016,1001,6.490527
1,1,0.892443,1001,14.642202
2,2,0.359624,963,21.161319
3,3,0.453611,1001,27.823334
4,4,0.415433,837,33.453901
...,...,...,...,...
120,120,0.113442,336,1990.249951
121,121,0.419759,144,1998.143374
122,122,0.942416,141,2006.465006
123,123,0.061000,319,2022.741612


In [11]:
tslist = []
tslist.append(timesteps)

In [24]:
X_TIMESTEPS = 'timesteps'
X_EPISODES = 'episodes'
X_WALLTIME = 'walltime_hrs'
Y_EPLEN = True
COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink',
          'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise',
          'darkgreen', 'tan', 'salmon', 'gold', 'lightpurple', 'darkred', 'darkblue']
xy_list = [ts2xy(timesteps_item, 'timesteps') for timesteps_item in tslist]

In [20]:
def plot_curves(xy_list, xaxis, title):
    """
    plot the curves

    :param xy_list: ([(np.ndarray, np.ndarray)]) the x and y coordinates to plot
    :param xaxis: (str) the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :param title: (str) the title of the plot
    """

    plt.figure(figsize=(8, 2))
    maxx = max(xy[0][-1] for xy in xy_list)
    minx = 0
    for (i, (x, y)) in enumerate(xy_list):
        color = COLORS[i]
        plt.scatter(x, y, s=2)
        # Do not plot the smoothed curve at all if the timeseries is shorter than window size.
        if x.shape[0] >= EPISODES_WINDOW:
            # Compute and plot rolling mean with window of size EPISODE_WINDOW
            x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean)
            plt.plot(x, y_mean, color=color)
    plt.xlim(minx, maxx)
    plt.title(title)
    plt.xlabel(xaxis)
    plt.ylabel("Episode Rewards")
    plt.tight_layout()

In [None]:
plot_curves(xy_list, 'timesteps', LEARNING_ALG + ' '+'Rewards')

In [None]:
tslist = []
    for folder in dirs:
        timesteps = load_results(folder)
        if num_timesteps is not None:
            timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
        tslist.append(timesteps)
    #plt.figure(1)
    xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist]
    plot_curves(xy_list, xaxis, task_name+'Rewards')
    plt.ylabel("Episode Rewards")
    #plt.figure(2)
    xy_list = [ts2xy(timesteps_item, xaxis, Y_EPLEN) for timesteps_item in tslist]
    plot_curves(xy_list, xaxis, task_name+'Ep Len')
    plt.ylabel("Episode Length")

In [13]:
def ts2xy(timesteps, xaxis,yaxis=None):
    """
    Decompose a timesteps variable to x ans ys

    :param timesteps: (Pandas DataFrame) the input data
    :param xaxis: (str) the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :return: (np.ndarray, np.ndarray) the x and y output
    """
    if xaxis == X_TIMESTEPS:
        x_var = np.cumsum(timesteps.l.values)
        y_var = timesteps.r.values
    elif xaxis == X_EPISODES:
        x_var = np.arange(len(timesteps))
        y_var = timesteps.r.values
    elif xaxis == X_WALLTIME:
        x_var = timesteps.t.values / 3600.
        y_var = timesteps.r.values
    else:
        raise NotImplementedError
    if yaxis is Y_EPLEN:
        y_var = timesteps.l.values
    return x_var, y_var

In [None]:


print(monitor_results)
plot_results([log_dir] , 10e10, 'timesteps', LEARNING_ALG + ' ')
plt.show() 

     index         r     l            t
0        0  0.446016  1001     6.490527
1        1  0.892443  1001    14.642202
2        2  0.359624   963    21.161319
3        3  0.453611  1001    27.823334
4        4  0.415433   837    33.453901
..     ...       ...   ...          ...
120    120  0.113442   336  1990.249951
121    121  0.419759   144  1998.143374
122    122  0.942416   141  2006.465006
123    123  0.061000   319  2022.741612
124    124  0.083135    49  2025.908978

[125 rows x 4 columns]


In [None]:
# SPDX-FileCopyrightText: Copyright (c) 2022 Guillaume Bellegarda. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Copyright (c) 2022 EPFL, Guillaume Bellegarda

import os, sys
import gym
import numpy as np
import time
import matplotlib
import matplotlib.pyplot as plt
from sys import platform
# may be helpful depending on your system
# if platform =="darwin": # mac
#   import PyQt5
#   matplotlib.use("Qt5Agg")
# else: # linux
#   matplotlib.use('TkAgg')

# stable-baselines3
from stable_baselines3.common.monitor import load_results 
from stable_baselines3.common.vec_env import VecNormalize
from stable_baselines3 import PPO, SAC
# from stable_baselines3.common.cmd_util import make_vec_env
from stable_baselines3.common.env_util import make_vec_env # fix for newer versions of stable-baselines3

from env.quadruped_gym_env import QuadrupedGymEnv
# utils
from utils.utils import plot_results
from utils.file_utils import get_latest_model, load_all_results


LEARNING_ALG = "SAC"
interm_dir = "./logs/intermediate_models/"
# path to saved models, i.e. interm_dir + '102824115106'
log_dir = interm_dir + '112624141434'

# initialize env configs (render at test time)
# check ideal conditions, as well as robustness to UNSEEN noise during training
env_config = {}
env_config['render'] = True
env_config['record_video'] = False
env_config['add_noise'] = False 
# env_config['competition_env'] = True

# get latest model and normalization stats, and plot 
stats_path = os.path.join(log_dir, "vec_normalize.pkl")
model_name = get_latest_model(log_dir)
monitor_results = load_results(log_dir)
print(monitor_results)
#plot_results([log_dir] , 10e10, 'timesteps', LEARNING_ALG + ' ')
#plt.show() 

# reconstruct env 
env = lambda: QuadrupedGymEnv(**env_config)
env = make_vec_env(env, n_envs=1)
env = VecNormalize.load(stats_path, env)
env.training = False    # do not update stats at test time
env.norm_reward = False # reward normalization is not needed at test time

# load model
if LEARNING_ALG == "PPO":
    model = PPO.load(model_name, env)
elif LEARNING_ALG == "SAC":
    model = SAC.load(model_name, env)
print("\nLoaded model", model_name, "\n")

obs = env.reset()
episode_reward = 0

# [TODO] initialize arrays to save data from simulation 
#
velocities = []  # To store the forward velocity (Vx)
heights = []     # To store the height of the robot's base
yaws = []        # To store the yaw angle
drifts = []      # To store the lateral drift (Vy)


for i in range(2000):
    action, _states = model.predict(obs,deterministic=False) # sample at test time? ([TODO]: test)
    obs, rewards, dones, info = env.step(action)
    episode_reward += rewards
    
    base_velocity = env.envs[0].env.robot.GetBaseLinearVelocity()  # [Vx, Vy, Vz]
    base_position = env.envs[0].env.robot.GetBasePosition()       # [x, y, z]
    base_orientation = env.envs[0].env.robot.GetBaseOrientationRollPitchYaw() # Quaternion [x, y, z, w]
    
    
    if dones:
        print('episode_reward', episode_reward)
        print('Final base position', info[0]['base_pos'])
        episode_reward = 0

    # [TODO] save data from current robot states for plots 
    # To get base position, for example: env.envs[0].env.robot.GetBasePosition() 
    #
    velocities.append(base_velocity[0])  # Forward velocity (Vx)
    heights.append(base_position[2])     # Height (z-coordinate)
    yaws.append(base_orientation[2])     # Assuming yaw is directly available
    drifts.append(base_position[1])      # Lateral velocity (y)
    if i % 100 == 0:
        print(i)
    
# [TODO] make plots:
# Generate time steps for the x-axis
time_steps = range(len(velocities))

# Plot forward velocity (Vx)
plt.figure()
plt.plot(time_steps, velocities, label='Velocity (Vx)')
plt.xlabel("Timestep")
plt.ylabel("Forward Velocity (m/s)")
plt.title("Forward Velocity Over Time")
plt.legend()
plt.grid()
plt.show()

# Plot height (z-coordinate)
plt.figure()
plt.plot(time_steps, heights, label='Height (z)', color='orange')
plt.xlabel("Timestep")
plt.ylabel("Height (m)")
plt.title("Height Over Time")
plt.legend()
plt.grid()
plt.show()

# Plot yaw
plt.figure()
plt.plot(time_steps, yaws, label='Yaw', color='green')
plt.xlabel("Timestep")
plt.ylabel("Yaw (rad)")
plt.title("Yaw Over Time")
plt.legend()
plt.grid()
plt.show()

# Plot lateral drift (Vy)
plt.figure()
plt.plot(time_steps, drifts, label='Drift (Vy)', color='red')
plt.xlabel("Timestep")
plt.ylabel("Drift (m)")
plt.title("Lateral Drift Over Time")
plt.legend()
plt.grid()
plt.show()

# Plot reward evolution
plt.figure()
plt.plot(time_steps, rewards, label='Rewards', color='purple')
plt.xlabel("Timestep")
plt.ylabel("Reward")
plt.title("Reward Over Time")
plt.legend()
plt.grid()
plt.show()

     index         r     l            t
0        0  0.446016  1001     6.490527
1        1  0.892443  1001    14.642202
2        2  0.359624   963    21.161319
3        3  0.453611  1001    27.823334
4        4  0.415433   837    33.453901
..     ...       ...   ...          ...
120    120  0.113442   336  1990.249951
121    121  0.419759   144  1998.143374
122    122  0.942416   141  2006.465006
123    123  0.061000   319  2022.741612
124    124  0.083135    49  2025.908978

[125 rows x 4 columns]





Loaded model ./logs/intermediate_models/112624141434\rl_model_30000_steps.zip 

0
100
200
300
episode_reward [0.334]
Final base position (-1.2441296528047, 0.17589269712017572, 0.33338849865896025)
400
episode_reward [0.292]
Final base position (-0.27109962921028075, -0.4027822592563341, 0.34556820613255956)
500
600
episode_reward [0.228]
Final base position (-0.9374327505942524, -0.37581337570851664, 0.31698921848597345)
700
800
900
1000
1100
1200
1300
1400
episode_reward [0.238]
Final base position (-3.889857005284016, 0.6164225099244677, 0.3052733006062227)
1500
1600
1700
1800
1900
episode_reward [0.419]
Final base position (0.09884752240822263, 0.6050434259297561, 0.2527348829271969)
