This notebook imports 2 modules from the directory `coppeliasim_api/env` :
- sim.py
- simConst.py 

It also needs the appropriate remote API library: "remoteApi.dll" (Windows), "remoteApi.dylib" (Mac) or "remoteApi.so" (Linux)

The CoppeliaSim Python API is documented here: http://www.coppeliarobotics.com/helpFiles/en/remoteApiFunctionsPython.htm

The cell bellow finds the relative path of the directory `coppeliasim_api/env` where the modules `sim` and `simConst` live:<br>
if not found in the current working dir it tries recursively to find a parent directory that holds `coppeliasim_api/env`.<br>
When found, it sets `root_dir` to this value and adds relevant paths to the list sys.path:

### 1) Set up the notebook to run in project root, enable autoreload and make our python packages visibles

In [1]:
import sys, os

# initialize default path values:
target_dir = "coppeliasim_api/env"
root_dir = os.getcwd()
copsim_env_path = target_dir

if not os.path.isdir(target_dir):
    while not os.path.isdir(copsim_env_path):
        copsim_env_path = os.path.join('..', copsim_env_path)
    root_dir = copsim_env_path.replace(target_dir, "")

# run notebook in root dir and add the required paths to sys.path:
if  root_dir !=  os.getcwd():  
    os.chdir(root_dir)
sys.path.append(os.getcwd())

print(f"\troot directory: <{root_dir}>")
print(f"\tworking directory is now: <{os.getcwd()}>")

# automatic reload of modules when modification
%load_ext autoreload
%autoreload 2

if not "/coppeliasim_api/env" in sys.path: sys.path.append("./coppeliasim_api/env")
if not "./ai/src/run" in sys.path: sys.path.append("./ai/src/run")
#sys.path


	root directory: <../../>
	working directory is now: </home/arthur/Documents/PFA/free-balancing-robot>


### 2) Run the test_model function for all the trained PPO models:

In [2]:
import pathlib, yaml, time
import numpy as np
from ai.src.run.test_model_copsim import test_model

from ai.src.run.constants import (MODEL_DIR, TEST_DIR, 
    EXPERIMENT_CONFIG_FILENAME, ENVIRONMENT_CONFIG_FILENAME)

max_steps_nb = 1000  # If you want to change this value, be careful to also change it in other files too.
# These variables are used to make the range of their values the closest possible to [-1, 1]
theta_weight = 1/0.1 # The maximum value of theta we have observed is 0.05, so theta should still never go above 1
x_weight = 1/0.2     # The maximum value of x we have observed is 0.025, so x should still never go above 1
x_dot_weight = 1/2 # The maximum value of x we have observed is 1.3, but we don't use this variable because we use a standard deviation

#This function uses the log of the test to determine how good the test was. (The closest to 1 is the best)
def metrique1(x, x_dot, theta):
    steps_nb = len(x)
    return steps_nb/max_steps_nb * (1 - 0.5*x_weight*(max(np.abs(x)))**2 - 0.3*theta_weight*max(np.abs(theta))**2 - 0.2 * np.abs(np.std((x_dot))))

vehicule = "balancingrobot"
first_train_dir = "BalancingRobotEnv_CopSim_PPO_22-04-28_01-13-17" #First model to be tested
last_train_dir  = ""    #Last model to be tested

prefix_dict = {'cartpole': "CartPoleEnv_CopSim_PPO_",       
               'balancingrobot': "BalancingRobotEnv_CopSim_PPO_"}
prefix = prefix_dict[vehicule]

vehicule_train_dir = os.path.join(MODEL_DIR, vehicule)
list_train_dir = [dir_name for dir_name in os.listdir(vehicule_train_dir) if prefix in dir_name]
list_train_dir.sort()

# extract the sub-list of training directory to be processed:
first, last = 0, len(list_train_dir)
if first_train_dir:
    try:
        first = list_train_dir.index(first_train_dir)
    except:
        pass
if last_train_dir:
    try:
        last = list_train_dir.index(last_train_dir)
    except:
        pass
    
if last+1 < len(list_train_dir):
    list_train_dir = list_train_dir[first:last+1]
else:
    list_train_dir = list_train_dir[first:]

results = {}

# scan the list
#for i in range(1): #You can switch the commented parts to test a specific model
for train_dir in list_train_dir:
    train_path = os.path.join(vehicule_train_dir, train_dir) # os.path.join(MODEL_DIR, "balancingrobot/last")
    ZIP_dir = os.path.join(train_path, 'ZIP')
    if os.path.isdir(ZIP_dir) and os.listdir(ZIP_dir):
        print("="*50+f"\nProcessing <{train_path}>")
    else:
        print("="*50+f"\nSkipping <{train_path}>")
        continue
    
        # retrieve env.yaml parameters:
    perf = test_model(train_path, copsim_port = 20005, display_plots=True, verbose=False)
    ppo_cfg_file = os.path.join(train_path, EXPERIMENT_CONFIG_FILENAME)
    with open(ppo_cfg_file, 'r') as f:
        ppo_cfg = yaml.safe_load(f.read())
    environment = ppo_cfg['env']['environment']
    seed        = ppo_cfg['train']['seed']

        # retrieve env.yaml parameters:
    env_cfg_file = os.path.join(train_path, ENVIRONMENT_CONFIG_FILENAME)
    with open(env_cfg_file, 'r') as f:
        env_cfg = yaml.safe_load(f.read())
    if environment == 'CartPoleEnv_CopSim':
            # load the saved parameters:
        veloc     = env_cfg['velocity']
        dt        = env_cfg['dt']
        version   = env_cfg['version']
        theta_lim = env_cfg['theta_lim']
        x_lim     = env_cfg['x_lim']   
        reward    = env_cfg['reward']     

    elif  environment == 'BalancingRobotEnv_CopSim':    
        veloc     = env_cfg['velocity']
        dt        = env_cfg['dt']
        version   = env_cfg['version']
        theta_lim = env_cfg['theta_lim']
        x_lim     = env_cfg['x_lim']   
        reward    = env_cfg['reward']     
            
        # process perf:
    actions            = perf["actions"]
    rewards            = perf['rewards']
    mean_abs_actions   = perf['mean_abs_actions']
    reward_cum         = perf['reward_cum']
    rewards_mean       = perf['rewards_mean']
    last_step_count    = perf['last_step_count']
    percent_completion = perf['percent_completion']
    x                  = perf['x']
    x_dot              = perf['x_dot']
    theta              = perf['theta']
    theta_dot          = perf['theta_dot']

    nb_max_actions = actions.count(-1) + actions.count(1)

    forged_custom_key = f"seed_{seed}-"
    for key, val in env_cfg.items():
        if "version" in key: continue
        forged_custom_key += f"{key}_{val}-"
        
    print("\t", forged_custom_key)
    print(f"\t mean_abs_action*: {mean_abs_actions:.2f}, nb_max_actions*: {nb_max_actions}, reward_cum: {reward_cum:.2f}, rewards_mean: {rewards_mean:.2f}")
    m = metrique1(perf['x'], perf['x_dot'], perf['theta'])
    print("La métrique est de: ", m)
    results[train_dir] = m

os.getcwd() is : /home/arthur/Documents/PFA/free-balancing-robot
	root directory: </home/arthur/Documents/PFA/free-balancing-robot>
	working directory is now: </home/arthur/Documents/PFA/free-balancing-robot>
os.getcwd() is : /home/arthur/Documents/PFA/free-balancing-robot
	root directory: </home/arthur/Documents/PFA/free-balancing-robot>
	working directory is now: </home/arthur/Documents/PFA/free-balancing-robot>
[0;34;47massurez vous que le dossier CoppeliaSim_Edu_V4_3_0_Ubuntu20_04 se trouve a la racine du projet


Une erreur peux survenir si votre version du dossier Copeliasim n'est pas nommé CoppeliaSim_Edu_V4_3_0_Ubuntu20_04, renommez le en consequence [0m
Processing <ai/models/balancingrobot/BalancingRobotEnv_CopSim_PPO_22-04-28_01-13-17>
	 'model.zip' not found, using <rl_model_10000_steps.zip>


<Figure size 1000x600 with 3 Axes>

dict_keys(['actions', 'deterministic', 'last_step_count', 'mean_abs_actions', 'percent_completion', 'reward_cum', 'rewards', 'rewards_mean', 'rewards_std', 'theta', 'theta_dot', 'x', 'x_dot'])
	 seed_12345-dt_0.05-reward_0-theta_lim_7-velocity_3-x_lim_0.05-
	 mean_abs_action*: 0.79, nb_max_actions*: 8, reward_cum: 13.05, rewards_mean: 0.93
La métrique est de:  0.01291997312184653
Processing <ai/models/balancingrobot/BalancingRobotEnv_CopSim_PPO_22-04-28_12-51-14>


<Figure size 1000x600 with 3 Axes>

dict_keys(['actions', 'deterministic', 'last_step_count', 'mean_abs_actions', 'percent_completion', 'reward_cum', 'rewards', 'rewards_mean', 'rewards_std', 'theta', 'theta_dot', 'x', 'x_dot'])
	 seed_12345-dt_0.05-reward_0-theta_lim_12-velocity_3-x_lim_0.1-
	 mean_abs_action*: 0.65, nb_max_actions*: 102, reward_cum: 297.59, rewards_mean: 1.01
La métrique est de:  0.26933124903156097
Processing <ai/models/balancingrobot/BalancingRobotEnv_CopSim_PPO_22-04-28_12-51-15>


<Figure size 1000x600 with 3 Axes>

dict_keys(['actions', 'deterministic', 'last_step_count', 'mean_abs_actions', 'percent_completion', 'reward_cum', 'rewards', 'rewards_mean', 'rewards_std', 'theta', 'theta_dot', 'x', 'x_dot'])
	 seed_12345-dt_0.05-reward_1-theta_lim_12-velocity_3-x_lim_0.1-
	 mean_abs_action*: 0.70, nb_max_actions*: 75, reward_cum: 173.87, rewards_mean: 0.96
La métrique est de:  0.16331325427807247
Processing <ai/models/balancingrobot/BalancingRobotEnv_CopSim_PPO_22-04-28_12-51-17>


<Figure size 1000x600 with 3 Axes>

dict_keys(['actions', 'deterministic', 'last_step_count', 'mean_abs_actions', 'percent_completion', 'reward_cum', 'rewards', 'rewards_mean', 'rewards_std', 'theta', 'theta_dot', 'x', 'x_dot'])
	 seed_12345-dt_0.05-reward_1-theta_lim_12-velocity_6-x_lim_0.1-
	 mean_abs_action*: 0.67, nb_max_actions*: 93, reward_cum: 238.92, rewards_mean: 0.95
La métrique est de:  0.22569801052983468


## Ranking of tested models:

In [3]:
from tools import print_ranking
#Sorts the models by their grade, and ranks them
print_ranking(results)

Model BalancingRobotEnv_CopSim_PPO_22-04-28_12-51-14 got the grade :   0.26933124903156097
Model BalancingRobotEnv_CopSim_PPO_22-04-28_12-51-17 got the grade :   0.22569801052983468
Model BalancingRobotEnv_CopSim_PPO_22-04-28_12-51-15 got the grade :   0.16331325427807247
Model BalancingRobotEnv_CopSim_PPO_22-04-28_01-13-17 got the grade :   0.01291997312184653


In [10]:
!killall -I coppelia


coppelia: aucun processus trouvé
