# Generate and Submit Jobs

In [6]:
import os
import numpy as np
import pandas as pd
import shutil

In [13]:
CPU_NUM = 1
JOB_TIME = '0-12:00'
JOB_MEMORY = '10G'
job_dir = '/scratch/lingheng/mrl_job_scripts'              # job script folder
job_out_dir = '/scratch/lingheng/mrl_job_scripts_output'   # job output folder
data_dir = '/scratch/lingheng/mrl_data'    # experiment data folder

In [3]:
if not os.path.exists(job_dir):
    os.makedirs(job_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

In [4]:
task_list = ['HalfCheetah-v4', 'Ant-v4', 'Walker2d-v4', 'Hopper-v4']
dp_type_list = ['MDP', 'POMDP-RV', 'POMDP-FLK', 'POMDP-RN', 'POMDP-RSM']
alg_list = ['PPO', 'SAC', 'MSAC', 'TD3', 'MTD3', 'LSTM-TD3']
seed_list = ['0', '1', '2', '3']

In [5]:

for task in task_list:
    print(task)
    for dp_type in dp_type_list:
        print('\t{}'.format(dp_type))
        for alg in alg_list:
            print('\t\t{}'.format(alg))
            for seed in seed_list:
                exp_name = '{0}_{1}_{2}_{3}'.format(task, dp_type, alg, seed)
                job_filename = 'job_{0}_{1}_{2}_{3}.sh'.format(task, dp_type, alg, seed)
                print('\t\t\t{}: {}'.format(seed, job_filename))
                with open(os.path.join(job_dir, job_filename), 'w') as job_file:
                    job_file.write('#!/bin/bash\n')
                    job_file.write('#SBATCH --account=def-rgorbet\n')
                    job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                    job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                    job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                    job_file.write('#SBATCH --output={0}/job_{1}_{2}_{3}_{4}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, dp_type, alg, seed))
                    job_file.write('## Main processing command\n')
                    job_file.write('module load StdEnv/2020  gcc/11.3.0 cuda/11.8.0 python/3.10\n')
                    job_file.write('source ~/m_rl_env/bin/activate\n')
                    job_file.write('python ~/projects/def-rgorbet/lingheng/m_rl_pomdp/m_rl/learn.py --env_id {0} --env_dp_type {1} --rl_agent {2} --seed {3} --exp_name {4} --data_dir {5}'.format(task, dp_type, alg, seed, exp_name, data_dir))

               

HalfCheetah-v4
	MDP
		PPO
			0: job_HalfCheetah-v4_MDP_PPO_0.sh
			1: job_HalfCheetah-v4_MDP_PPO_1.sh
			2: job_HalfCheetah-v4_MDP_PPO_2.sh
			3: job_HalfCheetah-v4_MDP_PPO_3.sh
		SAC
			0: job_HalfCheetah-v4_MDP_SAC_0.sh
			1: job_HalfCheetah-v4_MDP_SAC_1.sh
			2: job_HalfCheetah-v4_MDP_SAC_2.sh
			3: job_HalfCheetah-v4_MDP_SAC_3.sh
		MSAC
			0: job_HalfCheetah-v4_MDP_MSAC_0.sh
			1: job_HalfCheetah-v4_MDP_MSAC_1.sh
			2: job_HalfCheetah-v4_MDP_MSAC_2.sh
			3: job_HalfCheetah-v4_MDP_MSAC_3.sh
		TD3
			0: job_HalfCheetah-v4_MDP_TD3_0.sh
			1: job_HalfCheetah-v4_MDP_TD3_1.sh
			2: job_HalfCheetah-v4_MDP_TD3_2.sh
			3: job_HalfCheetah-v4_MDP_TD3_3.sh
		MTD3
			0: job_HalfCheetah-v4_MDP_MTD3_0.sh
			1: job_HalfCheetah-v4_MDP_MTD3_1.sh
			2: job_HalfCheetah-v4_MDP_MTD3_2.sh
			3: job_HalfCheetah-v4_MDP_MTD3_3.sh
		LSTM-TD3
			0: job_HalfCheetah-v4_MDP_LSTM-TD3_0.sh
			1: job_HalfCheetah-v4_MDP_LSTM-TD3_1.sh
			2: job_HalfCheetah-v4_MDP_LSTM-TD3_2.sh
			3: job_HalfCheetah-v4_MDP_LSTM-TD3_3.

## Submit job

In [14]:
job_script_list = os.listdir(job_dir)
job_script_list.sort()
job_script_list


['MUJOCO_LOG.TXT',
 'job_Ant-v4_MDP_LSTM-TD3_0.sh',
 'job_Ant-v4_MDP_LSTM-TD3_1.sh',
 'job_Ant-v4_MDP_LSTM-TD3_2.sh',
 'job_Ant-v4_MDP_LSTM-TD3_3.sh',
 'job_Ant-v4_MDP_MSAC_0.sh',
 'job_Ant-v4_MDP_MSAC_1.sh',
 'job_Ant-v4_MDP_MSAC_2.sh',
 'job_Ant-v4_MDP_MSAC_3.sh',
 'job_Ant-v4_MDP_MTD3_0.sh',
 'job_Ant-v4_MDP_MTD3_1.sh',
 'job_Ant-v4_MDP_MTD3_2.sh',
 'job_Ant-v4_MDP_MTD3_3.sh',
 'job_Ant-v4_MDP_PPO_0.sh',
 'job_Ant-v4_MDP_PPO_1.sh',
 'job_Ant-v4_MDP_PPO_2.sh',
 'job_Ant-v4_MDP_PPO_3.sh',
 'job_Ant-v4_MDP_SAC_0.sh',
 'job_Ant-v4_MDP_SAC_1.sh',
 'job_Ant-v4_MDP_SAC_2.sh',
 'job_Ant-v4_MDP_SAC_3.sh',
 'job_Ant-v4_MDP_TD3_0.sh',
 'job_Ant-v4_MDP_TD3_1.sh',
 'job_Ant-v4_MDP_TD3_2.sh',
 'job_Ant-v4_MDP_TD3_3.sh',
 'job_Ant-v4_POMDP-FLK_LSTM-TD3_0.sh',
 'job_Ant-v4_POMDP-FLK_LSTM-TD3_1.sh',
 'job_Ant-v4_POMDP-FLK_LSTM-TD3_2.sh',
 'job_Ant-v4_POMDP-FLK_LSTM-TD3_3.sh',
 'job_Ant-v4_POMDP-FLK_MSAC_0.sh',
 'job_Ant-v4_POMDP-FLK_MSAC_1.sh',
 'job_Ant-v4_POMDP-FLK_MSAC_2.sh',
 'job_Ant-v4_POMDP-F

In [15]:
len(job_script_list)

481

In [17]:
for job_i, job_script in enumerate(job_script_list):
    if '.sh' not in job_script:
        continue
    if 'PPO' not in job_script:
        continue
    job_script_path = os.path.join(job_dir, job_script)
    print('Submitting {}: {}'.format(job_i, job_script_path))
    !sbatch {job_script_path} 
    print("#########################################################################################")

Submitting 13: /scratch/lingheng/mrl_job_scripts/job_Ant-v4_MDP_PPO_0.sh
Submitted batch job 40234774
#########################################################################################
Submitting 14: /scratch/lingheng/mrl_job_scripts/job_Ant-v4_MDP_PPO_1.sh
Submitted batch job 40234775
#########################################################################################
Submitting 15: /scratch/lingheng/mrl_job_scripts/job_Ant-v4_MDP_PPO_2.sh
Submitted batch job 40234776
#########################################################################################
Submitting 16: /scratch/lingheng/mrl_job_scripts/job_Ant-v4_MDP_PPO_3.sh
Submitted batch job 40234777
#########################################################################################
Submitting 37: /scratch/lingheng/mrl_job_scripts/job_Ant-v4_POMDP-FLK_PPO_0.sh
Submitted batch job 40234778
#########################################################################################
Submitting 38: /scratch/lingheng/m

In [None]:
job_script_path = os.path.join(job_dir, job_script)
print('Submitting {}: {}'.format(job_i, job_script_path))
!sbatch {job_script_path} 

## Resume Experiment

In [3]:
CPU_NUM = 1
JOB_TIME = '0-12:00'
JOB_MEMORY = '10G'
job_dir = '/scratch/lingheng/mrl_resume_job_scripts'              # job script folder
job_out_dir = '/scratch/lingheng/mrl_resume_job_scripts_output'   # job output folder
data_dir = '/scratch/lingheng/mrl_data'    # experiment data folder

In [4]:
if not os.path.exists(job_dir):
    os.makedirs(job_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [23]:
for exp_run in os.listdir(data_dir):
    if '_gsdata_' in exp_run:
        continue
    # print(exp_run)
    exp_run_inner = os.listdir(os.path.join(data_dir, exp_run))[0]
    progress_file = os.path.join(data_dir, exp_run, exp_run_inner, 'progress.txt')
    if os.path.getsize(progress_file) != 0:
        progress_df = pd.read_csv(progress_file, sep='\t')
        if progress_df.isnull().values.any():
            print(exp_run)
            # break
    # os.path.getsize

2023-08-03_Ant-v4_POMDP-RSM_LSTM-TD3_3
2023-08-03_Walker2d-v4_POMDP-RN_LSTM-TD3_2
2023-08-03_Ant-v4_POMDP-RV_LSTM-TD3_3
2023-08-03_Walker2d-v4_POMDP-RSM_LSTM-TD3_3
2023-08-02_Ant-v4_MDP_LSTM-TD3_0.sh
2023-08-03_Walker2d-v4_POMDP-FLK_LSTM-TD3_1
2023-08-03_Hopper-v4_MDP_LSTM-TD3_3
2023-08-03_Walker2d-v4_POMDP-RV_LSTM-TD3_1
2023-08-03_Ant-v4_POMDP-FLK_LSTM-TD3_1
2023-08-03_Ant-v4_POMDP-RN_LSTM-TD3_0
2023-08-03_HalfCheetah-v4_POMDP-FLK_LSTM-TD3_2
2023-08-03_Hopper-v4_POMDP-RSM_LSTM-TD3_3
2023-08-03_HalfCheetah-v4_MDP_LSTM-TD3_2
2023-08-03_HalfCheetah-v4_POMDP-RN_LSTM-TD3_3
2023-08-03_Hopper-v4_POMDP-RN_LSTM-TD3_0
2023-08-03_HalfCheetah-v4_POMDP-RV_LSTM-TD3_0
2023-08-03_Hopper-v4_POMDP-RV_LSTM-TD3_3
2023-08-03_Hopper-v4_POMDP-FLK_LSTM-TD3_1
2023-08-03_HalfCheetah-v4_POMDP-RSM_LSTM-TD3_0
2023-08-03_Ant-v4_MDP_LSTM-TD3_3
2023-08-03_Ant-v4_POMDP-RV_LSTM-TD3_0
2023-08-03_Walker2d-v4_POMDP-RN_LSTM-TD3_1
2023-08-03_Ant-v4_POMDP-RSM_LSTM-TD3_0
2023-08-03_Walker2d-v4_POMDP-RSM_LSTM-TD3_0
2023-08-03

In [22]:
progress_df

Unnamed: 0,Epoch,AverageEpHCRet,StdEpHCRet,MaxEpHCRet,MinEpHCRet,AverageTestEpHCRet,StdTestEpHCRet,MaxTestEpHCRet,MinTestEpHCRet,AverageEpOrigHCRet,...,StdQ1Vals,MaxQ1Vals,MinQ1Vals,AverageQ2Vals,StdQ2Vals,MaxQ2Vals,MinQ2Vals,LossPi,LossQ,Time
0,1,-54.68223,80.206633,17.832783,-356.62875,,,,,-54.68223,...,,,,,,,,,,266.78701
1,2,-73.242708,132.004403,18.715786,-358.145,,,,,-73.242708,...,,,,,,,,,,781.815825
2,3,,,,,,,,,,...,,,,,,,,,,2062.677243
3,4,,,,,,,,,,...,,,,,,,,,,2422.52257
4,5,,,,,,,,,,...,,,,,,,,,,3155.476716
5,6,,,,,,,,,,...,,,,,,,,,,3643.740835
6,7,,,,,,,,,,...,,,,,,,,,,4290.484941
7,8,,,,,,,,,,...,,,,,,,,,,4700.222509
8,9,,,,,,,,,,...,,,,,,,,,,5401.253952
9,10,,,,,,,,,,...,,,,,,,,,,5902.924173


## Remove PPO with Action Clamp
Do not run this part again.

In [9]:
# data_dir = '/scratch/lingheng/mrl_data'    # experiment data folder

In [10]:
# os.mkdir(os.path.join(data_dir, 'archive_PPO_with_action_clamp'))

In [11]:
# for exp_run in os.listdir(data_dir):
#     if 'PPO' in exp_run:
#         print(exp_run)
#         shutil.move(os.path.join(data_dir, exp_run), os.path.join(data_dir, 'archive_PPO_with_action_clamp'))