In [5]:
import numpy as np
import os
CPU_NUM = 4
JOB_TIME = '0-12:00'
JOB_MEMORY = '12000M'
job_sub_dir = './job_scripts_DDPG_Sparse'
job_out_dir = './job_scripts_output_DDPG_Sparse'

In [6]:
if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [7]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

critic_sparsity_penalty_beta = [1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.02]
critic_sparsity_parameter_rho = [0.8, 0.6, 0.4, 0.2, 0.1]
seeds = ['0', '1']


for task in tasks:
    for beta in critic_sparsity_penalty_beta:
        for rho in critic_sparsity_parameter_rho:
            for s in seeds:
                job_filename = 'job_{0}_{1}_{2}_{3}.sh'.format(task, beta, rho, s)
                print(job_filename)
                with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                    job_file.write('#!/bin/bash\n')
                    job_file.write('#SBATCH --account=def-rgorbet\n')
                    job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                    job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                    job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                    job_file.write('#SBATCH --output={4}/job_{0}_{1}_{2}_{3}_%N-%j.out        # %N for node name, %j for jobID\n'.format(task,  beta, rho, s, job_out_dir))
                    job_file.write('## Main processing command\n')
                    job_file.write('module load mpi4py/3.0.3 \n')
                    job_file.write('source ~/torch_env/bin/activate\n')
                    job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/ddpg_sparse/ddpg_sparse.py  --env {0} --critic_sparsity_penalty_beta {1} --critic_sparsity_parameter_rho {2} --seed {3} --epochs 200  --data_dir spinup_DDPG_Sparse --exp_name DDPG-Sparse_{0}_{1}_{2}_{3}'.format(task, beta, rho, s, CPU_NUM))

job_HalfCheetahMuJoCoEnv-v0_1_0.8_0.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.8_1.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.6_0.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.6_1.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.4_0.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.4_1.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.2_0.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.2_1.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.1_0.sh
job_HalfCheetahMuJoCoEnv-v0_1_0.1_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.8_0.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.8_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.6_0.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.6_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.4_0.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.4_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.2_0.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.2_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.1_0.sh
job_HalfCheetahMuJoCoEnv-v0_0.8_0.1_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.6_0.8_0.sh
job_HalfCheetahMuJoCoEnv-v0_0.6_0.8_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.6_0.6_0.sh
job_HalfCheetahMuJoCoEnv-v0_0.6_0.6_1.sh
job_HalfCheetahMuJoCoEnv-v0_0.6_0.4_

job_Walker2DMuJoCoEnv-v0_0.05_0.4_1.sh
job_Walker2DMuJoCoEnv-v0_0.05_0.2_0.sh
job_Walker2DMuJoCoEnv-v0_0.05_0.2_1.sh
job_Walker2DMuJoCoEnv-v0_0.05_0.1_0.sh
job_Walker2DMuJoCoEnv-v0_0.05_0.1_1.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.8_0.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.8_1.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.6_0.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.6_1.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.4_0.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.4_1.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.2_0.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.2_1.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.1_0.sh
job_Walker2DMuJoCoEnv-v0_0.02_0.1_1.sh
job_HopperMuJoCoEnv-v0_1_0.8_0.sh
job_HopperMuJoCoEnv-v0_1_0.8_1.sh
job_HopperMuJoCoEnv-v0_1_0.6_0.sh
job_HopperMuJoCoEnv-v0_1_0.6_1.sh
job_HopperMuJoCoEnv-v0_1_0.4_0.sh
job_HopperMuJoCoEnv-v0_1_0.4_1.sh
job_HopperMuJoCoEnv-v0_1_0.2_0.sh
job_HopperMuJoCoEnv-v0_1_0.2_1.sh
job_HopperMuJoCoEnv-v0_1_0.1_0.sh
job_HopperMuJoCoEnv-v0_1_0.1_1.sh
job_HopperMuJoCoEnv-v0_0.8_0.8_0.sh
job_HopperMuJoCoEnv-v0_0.8_0.8_1.sh
job

job_HalfCheetahPyBulletEnv-v0_0.6_0.6_0.sh
job_HalfCheetahPyBulletEnv-v0_0.6_0.6_1.sh
job_HalfCheetahPyBulletEnv-v0_0.6_0.4_0.sh
job_HalfCheetahPyBulletEnv-v0_0.6_0.4_1.sh
job_HalfCheetahPyBulletEnv-v0_0.6_0.2_0.sh
job_HalfCheetahPyBulletEnv-v0_0.6_0.2_1.sh
job_HalfCheetahPyBulletEnv-v0_0.6_0.1_0.sh
job_HalfCheetahPyBulletEnv-v0_0.6_0.1_1.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.8_0.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.8_1.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.6_0.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.6_1.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.4_0.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.4_1.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.2_0.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.2_1.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.1_0.sh
job_HalfCheetahPyBulletEnv-v0_0.4_0.1_1.sh
job_HalfCheetahPyBulletEnv-v0_0.2_0.8_0.sh
job_HalfCheetahPyBulletEnv-v0_0.2_0.8_1.sh
job_HalfCheetahPyBulletEnv-v0_0.2_0.6_0.sh
job_HalfCheetahPyBulletEnv-v0_0.2_0.6_1.sh
job_HalfCheetahPyBulletEnv-v0_0.2_0.4_0.sh
job_HalfChe

job_HopperPyBulletEnv-v0_0.6_0.6_1.sh
job_HopperPyBulletEnv-v0_0.6_0.4_0.sh
job_HopperPyBulletEnv-v0_0.6_0.4_1.sh
job_HopperPyBulletEnv-v0_0.6_0.2_0.sh
job_HopperPyBulletEnv-v0_0.6_0.2_1.sh
job_HopperPyBulletEnv-v0_0.6_0.1_0.sh
job_HopperPyBulletEnv-v0_0.6_0.1_1.sh
job_HopperPyBulletEnv-v0_0.4_0.8_0.sh
job_HopperPyBulletEnv-v0_0.4_0.8_1.sh
job_HopperPyBulletEnv-v0_0.4_0.6_0.sh
job_HopperPyBulletEnv-v0_0.4_0.6_1.sh
job_HopperPyBulletEnv-v0_0.4_0.4_0.sh
job_HopperPyBulletEnv-v0_0.4_0.4_1.sh
job_HopperPyBulletEnv-v0_0.4_0.2_0.sh
job_HopperPyBulletEnv-v0_0.4_0.2_1.sh
job_HopperPyBulletEnv-v0_0.4_0.1_0.sh
job_HopperPyBulletEnv-v0_0.4_0.1_1.sh
job_HopperPyBulletEnv-v0_0.2_0.8_0.sh
job_HopperPyBulletEnv-v0_0.2_0.8_1.sh
job_HopperPyBulletEnv-v0_0.2_0.6_0.sh
job_HopperPyBulletEnv-v0_0.2_0.6_1.sh
job_HopperPyBulletEnv-v0_0.2_0.4_0.sh
job_HopperPyBulletEnv-v0_0.2_0.4_1.sh
job_HopperPyBulletEnv-v0_0.2_0.2_0.sh
job_HopperPyBulletEnv-v0_0.2_0.2_1.sh
job_HopperPyBulletEnv-v0_0.2_0.1_0.sh
job_HopperPy

job_ReacherPyBulletEnv-v0_1_0.6_0.sh
job_ReacherPyBulletEnv-v0_1_0.6_1.sh
job_ReacherPyBulletEnv-v0_1_0.4_0.sh
job_ReacherPyBulletEnv-v0_1_0.4_1.sh
job_ReacherPyBulletEnv-v0_1_0.2_0.sh
job_ReacherPyBulletEnv-v0_1_0.2_1.sh
job_ReacherPyBulletEnv-v0_1_0.1_0.sh
job_ReacherPyBulletEnv-v0_1_0.1_1.sh
job_ReacherPyBulletEnv-v0_0.8_0.8_0.sh
job_ReacherPyBulletEnv-v0_0.8_0.8_1.sh
job_ReacherPyBulletEnv-v0_0.8_0.6_0.sh
job_ReacherPyBulletEnv-v0_0.8_0.6_1.sh
job_ReacherPyBulletEnv-v0_0.8_0.4_0.sh
job_ReacherPyBulletEnv-v0_0.8_0.4_1.sh
job_ReacherPyBulletEnv-v0_0.8_0.2_0.sh
job_ReacherPyBulletEnv-v0_0.8_0.2_1.sh
job_ReacherPyBulletEnv-v0_0.8_0.1_0.sh
job_ReacherPyBulletEnv-v0_0.8_0.1_1.sh
job_ReacherPyBulletEnv-v0_0.6_0.8_0.sh
job_ReacherPyBulletEnv-v0_0.6_0.8_1.sh
job_ReacherPyBulletEnv-v0_0.6_0.6_0.sh
job_ReacherPyBulletEnv-v0_0.6_0.6_1.sh
job_ReacherPyBulletEnv-v0_0.6_0.4_0.sh
job_ReacherPyBulletEnv-v0_0.6_0.4_1.sh
job_ReacherPyBulletEnv-v0_0.6_0.2_0.sh
job_ReacherPyBulletEnv-v0_0.6_0.2_1.sh
jo

In [16]:
import numpy as np
import os
CPU_NUM = 4
JOB_TIME = '0-12:00'
JOB_MEMORY = '12000M'
job_sub_dir = './job_scripts_MDP_LSTM_TD3_CriticMemGate-False-Before-False_ActorMemGate-True-Before-False'
job_out_dir = './job_scripts_output_MDP_LSTM_TD3_CriticMemGate-False-Before-False_ActorMemGate-True-Before-False'

In [17]:
if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [18]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
#          'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
#          'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0']

partial_observable = [False]
# seeds = ['0', '1', '2', '3', '4']
seeds = ['0', '1']
# max_hist_len = [10, 50, 100]
max_hist_len = [0, 1, 3, 5]
alg_name = 'lstm_td3'

for task in tasks:
    for p_obs in partial_observable:
        for s in seeds:
            for m_h_l in max_hist_len:
                if p_obs:
                    p_obs_str = 'POMDP'
                else:
                    p_obs_str = 'MDP'
                job_filename = '{3}_job_{0}_{1}_{2}_{4}.sh'.format(task, s, p_obs_str, alg_name, m_h_l)
                print(job_filename)
                with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                    job_file.write('#!/bin/bash\n')
                    job_file.write('#SBATCH --account=def-rgorbet\n')
                    job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                    job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                    job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                    job_file.write('#SBATCH --output={0}/{4}_job_{1}_{2}_{3}_{5}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir,task, s, p_obs_str, alg_name, m_h_l))
                    job_file.write('## Main processing command\n')
                    job_file.write('module load mpi4py/3.0.3 \n')
                    job_file.write('source ~/torch_env/bin/activate\n')
                    job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{5}/{5}.py  --env {0} --partially_observable {1} --seed {2} --epochs 200 --max_hist_len {6} --critic_mem_gate False --critic_mem_gate_before_current_feature_extraction False --actor_mem_gate True --actor_mem_gate_before_current_feature_extraction False   --data_dir spinup_MDP_LSTM_TD3_CriticMemGate-False-BeforeCurr-False_ActorMemGate-True-BeforeCurr-False --exp_name {5}_{0}_{2}_{3}_{6}_CriticMemGate-False-BeforeCurr-False_ActorMemGate-True-BeforeCurr-False'.format(task, p_obs, s, p_obs_str, CPU_NUM, alg_name, m_h_l))                    
                    

lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_MDP_0.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_MDP_1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_MDP_3.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_MDP_5.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_MDP_0.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_MDP_1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_MDP_3.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_MDP_5.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_MDP_0.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_MDP_1.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_MDP_3.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_MDP_5.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_MDP_0.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_MDP_1.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_MDP_3.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_MDP_5.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_MDP_0.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_MDP_1.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_MDP_3.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_MDP_5.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_1_MDP_0.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_1_MDP_1.sh
lstm_td3_job_Walker2DMuJoC

### POMDP: Random Noise

In [6]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
#          'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
#          'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0']

# partial_observable = [True, False]
partial_observable = [True]
pomdp_type = ['random_noise']
random_noise_sigma = [0.05, 0.1, 0.2, 0.5]
# seeds = ['0', '1', '2', '3', '4']
seeds = ['0', '1']

alg_names = ['td3', 'sac', 'ddpg', 'td3_ow']

for alg_name in alg_names:
    for task in tasks:
        for p_obs in partial_observable:
            for p_type in pomdp_type:
                for r_n_s in random_noise_sigma:
                    for s in seeds:
                        if p_obs:
                            p_obs_str = 'POMDP'
                        else:
                            p_obs_str = 'MDP'
                        job_filename = '{3}_job_{0}_{1}_{2}_{4}_{5}.sh'.format(task, s, p_obs_str, alg_name, p_type, r_n_s)
                        print(job_filename)
                        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                            job_file.write('#!/bin/bash\n')
                            job_file.write('#SBATCH --account=def-rgorbet\n')
                            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                            job_file.write('#SBATCH --output={0}/{4}_job_{1}_{2}_{3}_{5}_{6}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s, p_obs_str, alg_name, p_type, r_n_s))
                            job_file.write('## Main processing command\n')
                            job_file.write('module load cuda cudnn mpi4py \n')
                            job_file.write('source ~/torch_env/bin/activate\n')
                            job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{5}/{5}.py  --env {0} --partially_observable {1} --seed {2} --pomdp_type {6} --epochs 200 --random_noise_sigma {7} --data_dir spinup_POMDP_RandomNoise --exp_name {5}_{0}_{2}_{3}_{6}_RandomNoiseSigma_{7}'.format(task, p_obs, s, p_obs_str, CPU_NUM, alg_name, p_type, r_n_s))

td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_noise_0.05.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_noise_0.05.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_noise_0.1.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_noise_0.1.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_noise_0.2.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_noise_0.2.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_noise_0.5.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_noise_0.5.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.05.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.05.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.1.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.1.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.2.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.2.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.5.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.5.sh
td3_job_Walker2DMuJoCoEnv-v0_0_POMDP_random_noise_0.05.sh
td3_job_Walker2DMuJoCoEnv-v0_1_POMDP_rando

sac_job_AntPyBulletEnv-v0_0_POMDP_random_noise_0.1.sh
sac_job_AntPyBulletEnv-v0_1_POMDP_random_noise_0.1.sh
sac_job_AntPyBulletEnv-v0_0_POMDP_random_noise_0.2.sh
sac_job_AntPyBulletEnv-v0_1_POMDP_random_noise_0.2.sh
sac_job_AntPyBulletEnv-v0_0_POMDP_random_noise_0.5.sh
sac_job_AntPyBulletEnv-v0_1_POMDP_random_noise_0.5.sh
sac_job_Walker2DPyBulletEnv-v0_0_POMDP_random_noise_0.05.sh
sac_job_Walker2DPyBulletEnv-v0_1_POMDP_random_noise_0.05.sh
sac_job_Walker2DPyBulletEnv-v0_0_POMDP_random_noise_0.1.sh
sac_job_Walker2DPyBulletEnv-v0_1_POMDP_random_noise_0.1.sh
sac_job_Walker2DPyBulletEnv-v0_0_POMDP_random_noise_0.2.sh
sac_job_Walker2DPyBulletEnv-v0_1_POMDP_random_noise_0.2.sh
sac_job_Walker2DPyBulletEnv-v0_0_POMDP_random_noise_0.5.sh
sac_job_Walker2DPyBulletEnv-v0_1_POMDP_random_noise_0.5.sh
sac_job_HopperPyBulletEnv-v0_0_POMDP_random_noise_0.05.sh
sac_job_HopperPyBulletEnv-v0_1_POMDP_random_noise_0.05.sh
sac_job_HopperPyBulletEnv-v0_0_POMDP_random_noise_0.1.sh
sac_job_HopperPyBulletEnv-v0_

td3_ow_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_noise_0.2.sh
td3_ow_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_noise_0.2.sh
td3_ow_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_noise_0.5.sh
td3_ow_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_noise_0.5.sh
td3_ow_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.05.sh
td3_ow_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.05.sh
td3_ow_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.1.sh
td3_ow_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.1.sh
td3_ow_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.2.sh
td3_ow_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.2.sh
td3_ow_job_AntMuJoCoEnv-v0_0_POMDP_random_noise_0.5.sh
td3_ow_job_AntMuJoCoEnv-v0_1_POMDP_random_noise_0.5.sh
td3_ow_job_Walker2DMuJoCoEnv-v0_0_POMDP_random_noise_0.05.sh
td3_ow_job_Walker2DMuJoCoEnv-v0_1_POMDP_random_noise_0.05.sh
td3_ow_job_Walker2DMuJoCoEnv-v0_0_POMDP_random_noise_0.1.sh
td3_ow_job_Walker2DMuJoCoEnv-v0_1_POMDP_random_noise_0.1.sh
td3_ow_job_Walker2DMuJoCoEnv-v0_0_POMDP_random_noise_0.2.sh
td3_

In [7]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
#          'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
#          'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0']

partial_observable = [True]
pomdp_type = ['random_noise']
random_noise_sigma = [0.05, 0.1, 0.2, 0.5]
# seeds = ['0', '1', '2', '3', '4']
seeds = ['0', '1']
# max_hist_len = [10, 50, 100]
max_hist_len = [0, 1, 3, 5]
alg_name = 'lstm_td3'

for task in tasks:
    for p_obs in partial_observable:
        for p_type in pomdp_type:
            for r_n_s in random_noise_sigma:
                for s in seeds:
                    for m_h_l in max_hist_len:
                        if p_obs:
                            p_obs_str = 'POMDP'
                        else:
                            p_obs_str = 'MDP'
                        job_filename = '{3}_job_{0}_{1}_{2}_{4}_{5}_{6}.sh'.format(task, s, p_obs_str, alg_name, m_h_l, p_type, r_n_s)
                        print(job_filename)
                        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                            job_file.write('#!/bin/bash\n')
                            job_file.write('#SBATCH --account=def-rgorbet\n')
                            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                            job_file.write('#SBATCH --output={0}/{4}_job_{1}_{2}_{3}_{5}_{6}_{7}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir,task, s, p_obs_str, alg_name, m_h_l, p_type, r_n_s))
                            job_file.write('## Main processing command\n')
                            job_file.write('module load cuda cudnn mpi4py \n')
                            job_file.write('source ~/torch_env/bin/activate\n')
                            job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{5}/{5}.py  --env {0} --partially_observable {1} --seed {2} --pomdp_type {7} --random_noise_sigma {8} --epochs 200 --max_hist_len {6}  --data_dir spinup_POMDP_RandomNoise --exp_name {5}_{0}_{2}_{3}_{6}_{7}_RandomNiseSigma_{8}'.format(task, p_obs, s, p_obs_str, CPU_NUM, alg_name, m_h_l, p_type,r_n_s))

lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_0_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_1_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_3_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_5_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_0_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_1_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_3_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_5_random_noise_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_0_random_noise_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_1_random_noise_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_3_random_noise_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_5_random_noise_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_0_random_noise_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_1_random_noise_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_3_random_

lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_1_random_noise_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_3_random_noise_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_5_random_noise_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_0_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_1_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_3_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_5_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_0_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_1_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_3_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_5_random_noise_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_0_random_noise_0.2.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_1_random_noise_0.2.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_3_random_no

lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_1_random_noise_0.05.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_3_random_noise_0.05.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_5_random_noise_0.05.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_0_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_1_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_3_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_5_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_0_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_1_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_3_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_5_random_noise_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_0_random_noise_0.2.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_1_random_noise_0.2.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_3_random_noise_0.2.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_5_random_noise_0.2.sh
lstm_td

lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_1_random_noise_0.5.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_3_random_noise_0.5.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_5_random_noise_0.5.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_0_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_1_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_3_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_5_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_0_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_1_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_3_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_5_random_noise_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_0_random_noise_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_1_random_noise_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_3_random_noise_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_

### POMDP: Random Sensor Missing

In [1]:
import numpy as np
import os
CPU_NUM = 4
JOB_TIME = '0-12:00'
JOB_MEMORY = '12000M'
job_sub_dir = './job_scripts_RandomSensorMissing'
job_out_dir = './job_scripts_output_RandomSensorMissing'

In [2]:
if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [3]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
#          'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
#          'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0']

# partial_observable = [True, False]
partial_observable = [True]
pomdp_type = ['random_sensor_missing']
random_sensor_missing_prob = [0.05, 0.1, 0.2, 0.5]
# seeds = ['0', '1', '2', '3', '4']
seeds = ['0', '1']

alg_names = ['td3', 'sac', 'ddpg', 'td3_ow']

for alg_name in alg_names:
    for task in tasks:
        for p_obs in partial_observable:
            for p_type in pomdp_type:
                for r_s_m_p in random_sensor_missing_prob:
                    for s in seeds:
                        if p_obs:
                            p_obs_str = 'POMDP'
                        else:
                            p_obs_str = 'MDP'
                        job_filename = '{3}_job_{0}_{1}_{2}_{4}_{5}.sh'.format(task, s, p_obs_str, alg_name, p_type, r_s_m_p)
                        print(job_filename)
                        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                            job_file.write('#!/bin/bash\n')
                            job_file.write('#SBATCH --account=def-rgorbet\n')
                            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                            job_file.write('#SBATCH --output={0}/{4}_job_{1}_{2}_{3}_{5}_{6}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s, p_obs_str, alg_name, p_type, r_s_m_p))
                            job_file.write('## Main processing command\n')
                            job_file.write('module load mpi4py/3.0.3 \n')
                            job_file.write('source ~/torch_env/bin/activate\n')
                            job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{5}/{5}.py  --env {0} --partially_observable {1} --seed {2} --pomdp_type {6} --epochs 200 --random_sensor_missing_prob {7} --data_dir spinup_POMDP_RandomSensorMissing --exp_name {5}_{0}_{2}_{3}_{6}_RandomSensorMissing_{7}'.format(task, p_obs, s, p_obs_str, CPU_NUM, alg_name, p_type, r_s_m_p))

td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.05.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.05.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.1.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.2.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.2.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.5.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.05.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.05.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.1.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.2.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.2.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
td3_job_AntMuJoCo

sac_job_Walker2DMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.05.sh
sac_job_Walker2DMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.1.sh
sac_job_Walker2DMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
sac_job_Walker2DMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.2.sh
sac_job_Walker2DMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.2.sh
sac_job_Walker2DMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
sac_job_Walker2DMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.5.sh
sac_job_HopperMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.05.sh
sac_job_HopperMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.05.sh
sac_job_HopperMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.1.sh
sac_job_HopperMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
sac_job_HopperMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.2.sh
sac_job_HopperMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.2.sh
sac_job_HopperMuJoCoEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
sac_job_HopperMuJoCoEnv-v0_1_POMDP_random_sensor_missing_0.5.sh
sac_job_InvertedPendulu

ddpg_job_AntPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
ddpg_job_AntPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.5.sh
ddpg_job_Walker2DPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.05.sh
ddpg_job_Walker2DPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.05.sh
ddpg_job_Walker2DPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.1.sh
ddpg_job_Walker2DPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
ddpg_job_Walker2DPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.2.sh
ddpg_job_Walker2DPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.2.sh
ddpg_job_Walker2DPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
ddpg_job_Walker2DPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.5.sh
ddpg_job_HopperPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.05.sh
ddpg_job_HopperPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.05.sh
ddpg_job_HopperPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.1.sh
ddpg_job_HopperPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
ddpg_job_HopperPyBulletEnv-v0_0_POMDP_random_sen

td3_ow_job_HopperPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
td3_ow_job_HopperPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.2.sh
td3_ow_job_HopperPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.2.sh
td3_ow_job_HopperPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
td3_ow_job_HopperPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.5.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.05.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.05.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.1.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.1.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.2.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.2.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_0_POMDP_random_sensor_missing_0.5.sh
td3_ow_job_InvertedPendulumPyBulletEnv-v0_1_POMDP_random_sensor_missing_0.5.sh
td3_ow_job_InvertedDo

In [4]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
#          'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
#          'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0']

partial_observable = [True]
pomdp_type = ['random_sensor_missing']
random_sensor_missing_prob = [0.05, 0.1, 0.2, 0.5]
# seeds = ['0', '1', '2', '3', '4']
seeds = ['0', '1']
# max_hist_len = [10, 50, 100]
max_hist_len = [0, 1, 3, 5]
alg_name = 'lstm_td3'

for task in tasks:
    for p_obs in partial_observable:
        for p_type in pomdp_type:
            for r_s_m_p in random_sensor_missing_prob:
                for s in seeds:
                    for m_h_l in max_hist_len:
                        if p_obs:
                            p_obs_str = 'POMDP'
                        else:
                            p_obs_str = 'MDP'
                        job_filename = '{3}_job_{0}_{1}_{2}_{4}_{5}_{6}.sh'.format(task, s, p_obs_str, alg_name, m_h_l, p_type, r_s_m_p)
                        print(job_filename)
                        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                            job_file.write('#!/bin/bash\n')
                            job_file.write('#SBATCH --account=def-rgorbet\n')
                            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                            job_file.write('#SBATCH --output={0}/{4}_job_{1}_{2}_{3}_{5}_{6}_{7}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir,task, s, p_obs_str, alg_name, m_h_l, p_type, r_s_m_p))
                            job_file.write('## Main processing command\n')
                            job_file.write('module load mpi4py/3.0.3 \n')
                            job_file.write('source ~/torch_env/bin/activate\n')
                            job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{5}/{5}.py  --env {0} --partially_observable {1} --seed {2} --pomdp_type {7} --random_sensor_missing_prob {8} --epochs 200 --max_hist_len {6}  --data_dir spinup_POMDP_RandomSensorMissing --exp_name {5}_{0}_{2}_{3}_{6}_{7}_RandomSensorMissing_{8}'.format(task, p_obs, s, p_obs_str, CPU_NUM, alg_name, m_h_l, p_type,r_s_m_p))

lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_0_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_1_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_3_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_5_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_0_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_1_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_3_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_5_random_sensor_missing_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_0_random_sensor_missing_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_1_random_sensor_missing_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_3_random_sensor_missing_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_5_random_sensor_missing_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_0_random_sensor_missing_0.1.sh
lstm

lstm_td3_job_HopperMuJoCoEnv-v0_0_POMDP_0_random_sensor_missing_0.5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_0_POMDP_1_random_sensor_missing_0.5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_0_POMDP_3_random_sensor_missing_0.5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_0_POMDP_5_random_sensor_missing_0.5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_1_POMDP_0_random_sensor_missing_0.5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_1_POMDP_1_random_sensor_missing_0.5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_1_POMDP_3_random_sensor_missing_0.5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_1_POMDP_5_random_sensor_missing_0.5.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_0_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_1_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_3_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_5_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_0_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedP

lstm_td3_job_AntPyBulletEnv-v0_1_POMDP_1_random_sensor_missing_0.05.sh
lstm_td3_job_AntPyBulletEnv-v0_1_POMDP_3_random_sensor_missing_0.05.sh
lstm_td3_job_AntPyBulletEnv-v0_1_POMDP_5_random_sensor_missing_0.05.sh
lstm_td3_job_AntPyBulletEnv-v0_0_POMDP_0_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_0_POMDP_1_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_0_POMDP_3_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_0_POMDP_5_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_1_POMDP_0_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_1_POMDP_1_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_1_POMDP_3_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_1_POMDP_5_random_sensor_missing_0.1.sh
lstm_td3_job_AntPyBulletEnv-v0_0_POMDP_0_random_sensor_missing_0.2.sh
lstm_td3_job_AntPyBulletEnv-v0_0_POMDP_1_random_sensor_missing_0.2.sh
lstm_td3_job_AntPyBulletEnv-v0_0_POMDP_3_random_sensor_missing_0.2.sh
lstm_td3_job_AntP

lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_1_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_3_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_5_random_sensor_missing_0.05.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_POMDP_0_random_sensor_missing_0.1.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_POMDP_1_random_sensor_missing_0.1.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_POMDP_3_random_sensor_missing_0.1.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_POMDP_5_random_sensor_missing_0.1.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_0_random_sensor_missing_0.1.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_1_random_sensor_missing_0.1.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_3_random_sensor_missing_0.1.sh
lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_5_random_sensor_missing_0.1.sh
lstm_td3_job_Inver

### POMDP: Flickering

In [1]:
import numpy as np
import os
CPU_NUM = 4
JOB_TIME = '0-12:00'
JOB_MEMORY = '12000M'
job_sub_dir = './job_scripts_Flickering'
job_out_dir = './job_scripts_output_Flickering'

In [2]:
if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [3]:

tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
#          'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
#          'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0']

partial_observable = [True]
pomdp_type = ['flickering']
flicker_prob = [0.05, 0.1, 0.5, 0.8]
# seeds = ['0', '1', '2', '3', '4']
seeds = ['0', '1']

alg_names = ['td3', 'sac', 'ddpg']

for alg_name in alg_names:
    for task in tasks:
        for p_obs in partial_observable:
            for p_type in pomdp_type:
                for f_p in flicker_prob:
                    for s in seeds:
                        if p_obs:
                            p_obs_str = 'POMDP'
                        else:
                            p_obs_str = 'MDP'
                        job_filename = '{3}_job_{0}_{1}_{2}_{4}_{5}.sh'.format(task, s, p_obs_str, alg_name, p_type, f_p)
                        print(job_filename)
                        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                            job_file.write('#!/bin/bash\n')
                            job_file.write('#SBATCH --account=def-rgorbet\n')
                            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                            job_file.write('#SBATCH --output={0}/{4}_job_{1}_{2}_{3}_{5}_{6}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir,task, s, p_obs_str, alg_name, p_type, f_p))
                            job_file.write('## Main processing command\n')
                            job_file.write('module load mpi4py/3.0.3 \n')
                            job_file.write('source ~/torch_env/bin/activate\n')
                            job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{5}/{5}.py  --env {0} --partially_observable {1} --seed {2} --pomdp_type {6} --flicker_prob {7} --epochs 200 --data_dir spinup_POMDP_Flickering --exp_name {5}_{0}_{2}_{3}_{6}_Flickering_{7}'.format(task, p_obs, s, p_obs_str, CPU_NUM, alg_name, p_type, f_p))

td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_flickering_0.05.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_flickering_0.05.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_flickering_0.1.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_flickering_0.1.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_flickering_0.5.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_flickering_0.5.sh
td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_flickering_0.8.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_flickering_0.8.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_flickering_0.05.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_flickering_0.05.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_flickering_0.1.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_flickering_0.1.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_flickering_0.5.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_flickering_0.5.sh
td3_job_AntMuJoCoEnv-v0_0_POMDP_flickering_0.8.sh
td3_job_AntMuJoCoEnv-v0_1_POMDP_flickering_0.8.sh
td3_job_Walker2DMuJoCoEnv-v0_0_POMDP_flickering_0.05.sh
td3_job_Walker2DMuJoCoEnv-v0_1_POMDP_flickering_0.05.sh
td3_job_Walker2DMuJo

sac_job_InvertedDoublePendulumMuJoCoEnv-v0_1_POMDP_flickering_0.1.sh
sac_job_InvertedDoublePendulumMuJoCoEnv-v0_0_POMDP_flickering_0.5.sh
sac_job_InvertedDoublePendulumMuJoCoEnv-v0_1_POMDP_flickering_0.5.sh
sac_job_InvertedDoublePendulumMuJoCoEnv-v0_0_POMDP_flickering_0.8.sh
sac_job_InvertedDoublePendulumMuJoCoEnv-v0_1_POMDP_flickering_0.8.sh
sac_job_HalfCheetahPyBulletEnv-v0_0_POMDP_flickering_0.05.sh
sac_job_HalfCheetahPyBulletEnv-v0_1_POMDP_flickering_0.05.sh
sac_job_HalfCheetahPyBulletEnv-v0_0_POMDP_flickering_0.1.sh
sac_job_HalfCheetahPyBulletEnv-v0_1_POMDP_flickering_0.1.sh
sac_job_HalfCheetahPyBulletEnv-v0_0_POMDP_flickering_0.5.sh
sac_job_HalfCheetahPyBulletEnv-v0_1_POMDP_flickering_0.5.sh
sac_job_HalfCheetahPyBulletEnv-v0_0_POMDP_flickering_0.8.sh
sac_job_HalfCheetahPyBulletEnv-v0_1_POMDP_flickering_0.8.sh
sac_job_AntPyBulletEnv-v0_0_POMDP_flickering_0.05.sh
sac_job_AntPyBulletEnv-v0_1_POMDP_flickering_0.05.sh
sac_job_AntPyBulletEnv-v0_0_POMDP_flickering_0.1.sh
sac_job_AntPyBu

ddpg_job_InvertedDoublePendulumPyBulletEnv-v0_0_POMDP_flickering_0.8.sh
ddpg_job_InvertedDoublePendulumPyBulletEnv-v0_1_POMDP_flickering_0.8.sh
ddpg_job_ReacherPyBulletEnv-v0_0_POMDP_flickering_0.05.sh
ddpg_job_ReacherPyBulletEnv-v0_1_POMDP_flickering_0.05.sh
ddpg_job_ReacherPyBulletEnv-v0_0_POMDP_flickering_0.1.sh
ddpg_job_ReacherPyBulletEnv-v0_1_POMDP_flickering_0.1.sh
ddpg_job_ReacherPyBulletEnv-v0_0_POMDP_flickering_0.5.sh
ddpg_job_ReacherPyBulletEnv-v0_1_POMDP_flickering_0.5.sh
ddpg_job_ReacherPyBulletEnv-v0_0_POMDP_flickering_0.8.sh
ddpg_job_ReacherPyBulletEnv-v0_1_POMDP_flickering_0.8.sh


In [4]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
#          'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

# tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
#          'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0']

partial_observable = [True]
pomdp_type = ['flickering']
flicker_prob = [0.05, 0.1, 0.5, 0.8]
# seeds = ['0', '1', '2', '3', '4']
seeds = ['0', '1']
# max_hist_len = [10, 50, 100]
max_hist_len = [0, 1, 3, 5]
alg_name = 'lstm_td3'

for task in tasks:
    for p_obs in partial_observable:
        for p_type in pomdp_type:
            for f_p in flicker_prob:
                for s in seeds:
                    for m_h_l in max_hist_len:
                        if p_obs:
                            p_obs_str = 'POMDP'
                        else:
                            p_obs_str = 'MDP'
                        job_filename = '{3}_job_{0}_{1}_{2}_{4}_{5}_{6}.sh'.format(task, s, p_obs_str, alg_name, m_h_l, p_type, f_p)
                        print(job_filename)
                        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                            job_file.write('#!/bin/bash\n')
                            job_file.write('#SBATCH --account=def-rgorbet\n')
                            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                            job_file.write('#SBATCH --output={0}/{4}_job_{1}_{2}_{3}_{5}_{6}_{7}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir,task, s, p_obs_str, alg_name, m_h_l, p_type, f_p))
                            job_file.write('## Main processing command\n')
                            job_file.write('module load mpi4py/3.0.3 \n')
                            job_file.write('source ~/torch_env/bin/activate\n')
                            job_file.write('mpirun --oversubscribe -np {4} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{5}/{5}.py  --env {0} --partially_observable {1} --seed {2} --pomdp_type {7} --flicker_prob {8} --epochs 200 --max_hist_len {6}  --data_dir spinup_POMDP_Flickering --exp_name {5}_{0}_{2}_{3}_{6}_{7}_Flickering_{8}'.format(task, p_obs, s, p_obs_str, CPU_NUM, alg_name, m_h_l, p_type, f_p))

lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_0_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_1_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_3_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_5_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_0_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_1_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_3_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_5_flickering_0.05.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_0_flickering_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_1_flickering_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_3_flickering_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_POMDP_5_flickering_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_0_flickering_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_1_flickering_0.1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_POMDP_3_flickering_0.1.sh
lstm_td3_job_Half

lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_5_flickering_0.05.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_0_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_1_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_3_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_5_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_0_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_1_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_3_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_5_flickering_0.1.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_0_flickering_0.5.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_1_flickering_0.5.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_3_flickering_0.5.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_0_POMDP_5_flickering_0.5.sh
lstm_td3_job_InvertedPendulumMuJoCoEnv-v0_1_POMDP_0_flickering_0.5.sh
lstm_td3_job_Invert

lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_5_flickering_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_0_flickering_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_1_flickering_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_3_flickering_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_5_flickering_0.1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_0_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_1_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_3_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_5_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_0_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_1_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_3_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_POMDP_5_flickering_0.5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_0_flickering_0.8.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_POMDP_1_flickering_0.8.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_PO

lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_1_flickering_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_3_flickering_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_5_flickering_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_0_flickering_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_1_flickering_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_3_flickering_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_5_flickering_0.05.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_0_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_1_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_3_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_5_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_0_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_1_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_3_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_1_POMDP_5_flickering_0.1.sh
lstm_td3_job_ReacherPyBulletEnv-v0_0_POMDP_0_fli

### POMDP-Combined

In [1]:
pomdp_types = ['remove_velocity_and_flickering', 'remove_velocity_and_random_noise',
               'remove_velocity_and_random_sensor_missing', 'flickering_and_random_noise',
               'random_noise_and_random_sensor_missing', 'random_sensor_missing_and_random_noise']

In [2]:
import numpy as np
import os
CPU_NUM = 4
JOB_TIME = '0-12:00'
JOB_MEMORY = '12000M'


In [4]:
tasks = ['HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'Walker2DMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'InvertedDoublePendulumMuJoCoEnv-v0', 
         'HalfCheetahPyBulletEnv-v0', 'AntPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0', 
         'InvertedPendulumPyBulletEnv-v0', 'InvertedDoublePendulumPyBulletEnv-v0', 'ReacherPyBulletEnv-v0']

seeds = ['0', '1']
alg_names = ['td3', 'sac', 'ddpg']

for pomdp_t in pomdp_types:
    job_sub_dir = './job_scripts_{}'.format(pomdp_t)
    job_out_dir = './job_scripts_output_{}'.format(pomdp_t)
    if not os.path.exists(job_sub_dir):
        os.makedirs(job_sub_dir)
    if not os.path.exists(job_out_dir):
        os.makedirs(job_out_dir)
    
    for alg_name in alg_names:
        for task in tasks:
            for s in seeds:
                job_filename = '{2}_job_{0}_{1}.sh'.format(task, s, alg_name)
                print(job_filename)
                with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                    job_file.write('#!/bin/bash\n')
                    job_file.write('#SBATCH --account=def-rgorbet\n')
                    job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                    job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                    job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                    job_file.write('#SBATCH --output={0}/{3}_job_{1}_{2}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s, alg_name))
                    job_file.write('## Main processing command\n')
                    job_file.write('module load mpi4py/3.0.3 \n')
                    job_file.write('source ~/torch_env/bin/activate\n')
                    job_file.write('mpirun --oversubscribe -np {2} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{3}/{3}.py  --env {0} --partially_observable True --seed {1} --pomdp_type {4} --epochs 200 --data_dir spinup_POMDP_{4} --exp_name {3}_{0}_{1}'.format(task, s, CPU_NUM, alg_name, pomdp_t))

td3_job_HalfCheetahMuJoCoEnv-v0_0.sh
td3_job_HalfCheetahMuJoCoEnv-v0_1.sh
td3_job_AntMuJoCoEnv-v0_0.sh
td3_job_AntMuJoCoEnv-v0_1.sh
td3_job_Walker2DMuJoCoEnv-v0_0.sh
td3_job_Walker2DMuJoCoEnv-v0_1.sh
td3_job_HopperMuJoCoEnv-v0_0.sh
td3_job_HopperMuJoCoEnv-v0_1.sh
td3_job_InvertedDoublePendulumMuJoCoEnv-v0_0.sh
td3_job_InvertedDoublePendulumMuJoCoEnv-v0_1.sh
td3_job_HalfCheetahPyBulletEnv-v0_0.sh
td3_job_HalfCheetahPyBulletEnv-v0_1.sh
td3_job_AntPyBulletEnv-v0_0.sh
td3_job_AntPyBulletEnv-v0_1.sh
td3_job_Walker2DPyBulletEnv-v0_0.sh
td3_job_Walker2DPyBulletEnv-v0_1.sh
td3_job_HopperPyBulletEnv-v0_0.sh
td3_job_HopperPyBulletEnv-v0_1.sh
td3_job_InvertedPendulumPyBulletEnv-v0_0.sh
td3_job_InvertedPendulumPyBulletEnv-v0_1.sh
td3_job_InvertedDoublePendulumPyBulletEnv-v0_0.sh
td3_job_InvertedDoublePendulumPyBulletEnv-v0_1.sh
td3_job_ReacherPyBulletEnv-v0_0.sh
td3_job_ReacherPyBulletEnv-v0_1.sh
sac_job_HalfCheetahMuJoCoEnv-v0_0.sh
sac_job_HalfCheetahMuJoCoEnv-v0_1.sh
sac_job_AntMuJoCoEnv-v0_0.sh

sac_job_AntMuJoCoEnv-v0_0.sh
sac_job_AntMuJoCoEnv-v0_1.sh
sac_job_Walker2DMuJoCoEnv-v0_0.sh
sac_job_Walker2DMuJoCoEnv-v0_1.sh
sac_job_HopperMuJoCoEnv-v0_0.sh
sac_job_HopperMuJoCoEnv-v0_1.sh
sac_job_InvertedDoublePendulumMuJoCoEnv-v0_0.sh
sac_job_InvertedDoublePendulumMuJoCoEnv-v0_1.sh
sac_job_HalfCheetahPyBulletEnv-v0_0.sh
sac_job_HalfCheetahPyBulletEnv-v0_1.sh
sac_job_AntPyBulletEnv-v0_0.sh
sac_job_AntPyBulletEnv-v0_1.sh
sac_job_Walker2DPyBulletEnv-v0_0.sh
sac_job_Walker2DPyBulletEnv-v0_1.sh
sac_job_HopperPyBulletEnv-v0_0.sh
sac_job_HopperPyBulletEnv-v0_1.sh
sac_job_InvertedPendulumPyBulletEnv-v0_0.sh
sac_job_InvertedPendulumPyBulletEnv-v0_1.sh
sac_job_InvertedDoublePendulumPyBulletEnv-v0_0.sh
sac_job_InvertedDoublePendulumPyBulletEnv-v0_1.sh
sac_job_ReacherPyBulletEnv-v0_0.sh
sac_job_ReacherPyBulletEnv-v0_1.sh
ddpg_job_HalfCheetahMuJoCoEnv-v0_0.sh
ddpg_job_HalfCheetahMuJoCoEnv-v0_1.sh
ddpg_job_AntMuJoCoEnv-v0_0.sh
ddpg_job_AntMuJoCoEnv-v0_1.sh
ddpg_job_Walker2DMuJoCoEnv-v0_0.sh
ddpg_

In [9]:

seeds = ['0', '1']
max_hist_len = [0, 1, 3, 5]
alg_name = 'lstm_td3'
for pomdp_t in pomdp_types:
    job_sub_dir = './job_scripts_lstm_td3_{}'.format(pomdp_t)
    job_out_dir = './job_scripts_lstm_td3_output_{}'.format(pomdp_t)
    if not os.path.exists(job_sub_dir):
        os.makedirs(job_sub_dir)
    if not os.path.exists(job_out_dir):
        os.makedirs(job_out_dir)
       
    for task in tasks:
        for s in seeds:
            for m_h_l in max_hist_len:
                job_filename = '{2}_job_{0}_{1}_{3}.sh'.format(task, s, alg_name, m_h_l)
                print(job_filename)
                with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                    job_file.write('#!/bin/bash\n')
                    job_file.write('#SBATCH --account=def-rgorbet\n')
                    job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                    job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                    job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                    job_file.write('#SBATCH --output={0}/{3}_job_{1}_{2}_{4}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir,task, s, alg_name, m_h_l))
                    job_file.write('## Main processing command\n')
                    job_file.write('module load mpi4py/3.0.3 \n')
                    job_file.write('source ~/torch_env/bin/activate\n')
                    job_file.write('mpirun --oversubscribe -np {2} python ~/projects/def-rgorbet/lingheng/spinningup_new/spinup/algos/pytorch/{3}/{3}.py  --env {0} --partially_observable True --seed {1} --pomdp_type {5} --epochs 200 --max_hist_len {4}  --data_dir spinup_POMDP_{5} --exp_name {3}_{0}_{1}_{4}'.format(task, s, CPU_NUM, alg_name, m_h_l, pomdp_t))

lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_0.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_3.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_5.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_0.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_1.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_3.sh
lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_5.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_0.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_1.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_3.sh
lstm_td3_job_AntMuJoCoEnv-v0_0_5.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_0.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_1.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_3.sh
lstm_td3_job_AntMuJoCoEnv-v0_1_5.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_0.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_1.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_3.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_0_5.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_1_0.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_1_1.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_1_3.sh
lstm_td3_job_Walker2DMuJoCoEnv-v0_1_5.sh
lstm_td3_job_HopperMuJoCoEnv-v0_

lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_0.sh
lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_1.sh
lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_3.sh
lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_5.sh
lstm_td3_job_HalfCheetahPyBulletEnv-v0_1_0.sh
lstm_td3_job_HalfCheetahPyBulletEnv-v0_1_1.sh
lstm_td3_job_HalfCheetahPyBulletEnv-v0_1_3.sh
lstm_td3_job_HalfCheetahPyBulletEnv-v0_1_5.sh
lstm_td3_job_AntPyBulletEnv-v0_0_0.sh
lstm_td3_job_AntPyBulletEnv-v0_0_1.sh
lstm_td3_job_AntPyBulletEnv-v0_0_3.sh
lstm_td3_job_AntPyBulletEnv-v0_0_5.sh
lstm_td3_job_AntPyBulletEnv-v0_1_0.sh
lstm_td3_job_AntPyBulletEnv-v0_1_1.sh
lstm_td3_job_AntPyBulletEnv-v0_1_3.sh
lstm_td3_job_AntPyBulletEnv-v0_1_5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_0.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_3.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_0.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_3.sh
lstm_td3_job_Walker2DPyBull

lstm_td3_job_HalfCheetahPyBulletEnv-v0_1_5.sh
lstm_td3_job_AntPyBulletEnv-v0_0_0.sh
lstm_td3_job_AntPyBulletEnv-v0_0_1.sh
lstm_td3_job_AntPyBulletEnv-v0_0_3.sh
lstm_td3_job_AntPyBulletEnv-v0_0_5.sh
lstm_td3_job_AntPyBulletEnv-v0_1_0.sh
lstm_td3_job_AntPyBulletEnv-v0_1_1.sh
lstm_td3_job_AntPyBulletEnv-v0_1_3.sh
lstm_td3_job_AntPyBulletEnv-v0_1_5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_0.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_3.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_0_5.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_0.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_1.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_3.sh
lstm_td3_job_Walker2DPyBulletEnv-v0_1_5.sh
lstm_td3_job_HopperPyBulletEnv-v0_0_0.sh
lstm_td3_job_HopperPyBulletEnv-v0_0_1.sh
lstm_td3_job_HopperPyBulletEnv-v0_0_3.sh
lstm_td3_job_HopperPyBulletEnv-v0_0_5.sh
lstm_td3_job_HopperPyBulletEnv-v0_1_0.sh
lstm_td3_job_HopperPyBulletEnv-v0_1_1.sh
lstm_td3_job_HopperPyBulletEnv-v0_1_3.sh
lstm_td3_job_Hopper

In [10]:
for pomdp_t in pomdp_types:
    job_sub_dir = './job_scripts_lstm_td3_{}'.format(pomdp_t)
    job_out_dir = './job_scripts_lstm_td3_output_{}'.format(pomdp_t)
    
    jobs = os.listdir(job_sub_dir)
    jobs.sort()
    print('number of job: {}'.format(len(jobs)))
    i=1
    for job in jobs:
        if job.endswith(".sh"):
            code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
            print('{} ---- {}: {}'.format(i, job, code))
            i += 1

number of job: 96
1 ---- lstm_td3_job_AntMuJoCoEnv-v0_0_0.sh: 0
2 ---- lstm_td3_job_AntMuJoCoEnv-v0_0_1.sh: 0
3 ---- lstm_td3_job_AntMuJoCoEnv-v0_0_3.sh: 0
4 ---- lstm_td3_job_AntMuJoCoEnv-v0_0_5.sh: 0
5 ---- lstm_td3_job_AntMuJoCoEnv-v0_1_0.sh: 0
6 ---- lstm_td3_job_AntMuJoCoEnv-v0_1_1.sh: 0
7 ---- lstm_td3_job_AntMuJoCoEnv-v0_1_3.sh: 0
8 ---- lstm_td3_job_AntMuJoCoEnv-v0_1_5.sh: 0
9 ---- lstm_td3_job_AntPyBulletEnv-v0_0_0.sh: 0
10 ---- lstm_td3_job_AntPyBulletEnv-v0_0_1.sh: 0
11 ---- lstm_td3_job_AntPyBulletEnv-v0_0_3.sh: 0
12 ---- lstm_td3_job_AntPyBulletEnv-v0_0_5.sh: 0
13 ---- lstm_td3_job_AntPyBulletEnv-v0_1_0.sh: 0
14 ---- lstm_td3_job_AntPyBulletEnv-v0_1_1.sh: 0
15 ---- lstm_td3_job_AntPyBulletEnv-v0_1_3.sh: 0
16 ---- lstm_td3_job_AntPyBulletEnv-v0_1_5.sh: 0
17 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_0.sh: 0
18 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_1.sh: 0
19 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_3.sh: 0
20 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_5.sh: 0
21 

55 ---- lstm_td3_job_InvertedDoublePendulumMuJoCoEnv-v0_1_3.sh: 0
56 ---- lstm_td3_job_InvertedDoublePendulumMuJoCoEnv-v0_1_5.sh: 0
57 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_0.sh: 0
58 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_1.sh: 0
59 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_3.sh: 0
60 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_0_5.sh: 0
61 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_0.sh: 0
62 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_1.sh: 0
63 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_3.sh: 0
64 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_5.sh: 0
65 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_0.sh: 0
66 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_1.sh: 0
67 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_3.sh: 0
68 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_5.sh: 0
69 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_1_0.sh: 256
70 ---- lstm

11 ---- lstm_td3_job_AntPyBulletEnv-v0_0_3.sh: 0
12 ---- lstm_td3_job_AntPyBulletEnv-v0_0_5.sh: 0
13 ---- lstm_td3_job_AntPyBulletEnv-v0_1_0.sh: 0
14 ---- lstm_td3_job_AntPyBulletEnv-v0_1_1.sh: 0
15 ---- lstm_td3_job_AntPyBulletEnv-v0_1_3.sh: 0
16 ---- lstm_td3_job_AntPyBulletEnv-v0_1_5.sh: 0
17 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_0.sh: 0
18 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_1.sh: 0
19 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_3.sh: 0
20 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_0_5.sh: 0
21 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_0.sh: 0
22 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_1.sh: 0
23 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_3.sh: 0
24 ---- lstm_td3_job_HalfCheetahMuJoCoEnv-v0_1_5.sh: 0
25 ---- lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_0.sh: 0
26 ---- lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_1.sh: 0
27 ---- lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_3.sh: 0
28 ---- lstm_td3_job_HalfCheetahPyBulletEnv-v0_0_5.sh: 0
29 ---- lstm_td3_job_HalfCheetahPyBull

62 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_1.sh: 0
63 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_3.sh: 0
64 ---- lstm_td3_job_InvertedDoublePendulumPyBulletEnv-v0_1_5.sh: 0
65 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_0.sh: 0
66 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_1.sh: 0
67 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_3.sh: 0
68 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_0_5.sh: 0
69 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_1_0.sh: 0
70 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_1_1.sh: 0
71 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_1_3.sh: 0
72 ---- lstm_td3_job_InvertedPendulumPyBulletEnv-v0_1_5.sh: 0
73 ---- lstm_td3_job_ReacherPyBulletEnv-v0_0_0.sh: 0
74 ---- lstm_td3_job_ReacherPyBulletEnv-v0_0_1.sh: 0
75 ---- lstm_td3_job_ReacherPyBulletEnv-v0_0_3.sh: 0
76 ---- lstm_td3_job_ReacherPyBulletEnv-v0_0_5.sh: 0
77 ---- lstm_td3_job_ReacherPyBulletEnv-v0_1_0.sh: 0
78 ---- lstm_td3_job_ReacherPyBulle

## Submit jobs

In [8]:
jobs = os.listdir(job_sub_dir)
jobs.sort()
len(jobs)

960

In [9]:
# job_sub_dir = './job_scripts_new'
jobs = os.listdir(job_sub_dir)
jobs.sort()

i=1
for job in jobs:
    if job.endswith(".sh"):
        code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
        print('{} ---- {}: {}'.format(i, job, code))
        i += 1

1 ---- job_AntMuJoCoEnv-v0_0.02_0.1_0.sh: 0
2 ---- job_AntMuJoCoEnv-v0_0.02_0.1_1.sh: 0
3 ---- job_AntMuJoCoEnv-v0_0.02_0.2_0.sh: 0
4 ---- job_AntMuJoCoEnv-v0_0.02_0.2_1.sh: 0
5 ---- job_AntMuJoCoEnv-v0_0.02_0.4_0.sh: 0
6 ---- job_AntMuJoCoEnv-v0_0.02_0.4_1.sh: 0
7 ---- job_AntMuJoCoEnv-v0_0.02_0.6_0.sh: 0
8 ---- job_AntMuJoCoEnv-v0_0.02_0.6_1.sh: 0
9 ---- job_AntMuJoCoEnv-v0_0.02_0.8_0.sh: 0
10 ---- job_AntMuJoCoEnv-v0_0.02_0.8_1.sh: 0
11 ---- job_AntMuJoCoEnv-v0_0.05_0.1_0.sh: 0
12 ---- job_AntMuJoCoEnv-v0_0.05_0.1_1.sh: 0
13 ---- job_AntMuJoCoEnv-v0_0.05_0.2_0.sh: 0
14 ---- job_AntMuJoCoEnv-v0_0.05_0.2_1.sh: 0
15 ---- job_AntMuJoCoEnv-v0_0.05_0.4_0.sh: 0
16 ---- job_AntMuJoCoEnv-v0_0.05_0.4_1.sh: 0
17 ---- job_AntMuJoCoEnv-v0_0.05_0.6_0.sh: 0
18 ---- job_AntMuJoCoEnv-v0_0.05_0.6_1.sh: 0
19 ---- job_AntMuJoCoEnv-v0_0.05_0.8_0.sh: 0
20 ---- job_AntMuJoCoEnv-v0_0.05_0.8_1.sh: 0
21 ---- job_AntMuJoCoEnv-v0_0.1_0.1_0.sh: 0
22 ---- job_AntMuJoCoEnv-v0_0.1_0.1_1.sh: 0
23 ---- job_AntMuJoCo

180 ---- job_HalfCheetahMuJoCoEnv-v0_0.05_0.8_1.sh: 0
181 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.1_0.sh: 0
182 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.1_1.sh: 0
183 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.2_0.sh: 0
184 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.2_1.sh: 0
185 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.4_0.sh: 0
186 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.4_1.sh: 0
187 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.6_0.sh: 0
188 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.6_1.sh: 0
189 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.8_0.sh: 0
190 ---- job_HalfCheetahMuJoCoEnv-v0_0.1_0.8_1.sh: 0
191 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.1_0.sh: 0
192 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.1_1.sh: 0
193 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.2_0.sh: 0
194 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.2_1.sh: 0
195 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.4_0.sh: 0
196 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.4_1.sh: 0
197 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.6_0.sh: 0
198 ---- job_HalfCheetahMuJoCoEnv-v0_0.2_0.6_

333 ---- job_HopperMuJoCoEnv-v0_0.05_0.2_0.sh: 0
334 ---- job_HopperMuJoCoEnv-v0_0.05_0.2_1.sh: 0
335 ---- job_HopperMuJoCoEnv-v0_0.05_0.4_0.sh: 0
336 ---- job_HopperMuJoCoEnv-v0_0.05_0.4_1.sh: 0
337 ---- job_HopperMuJoCoEnv-v0_0.05_0.6_0.sh: 0
338 ---- job_HopperMuJoCoEnv-v0_0.05_0.6_1.sh: 0
339 ---- job_HopperMuJoCoEnv-v0_0.05_0.8_0.sh: 0
340 ---- job_HopperMuJoCoEnv-v0_0.05_0.8_1.sh: 0
341 ---- job_HopperMuJoCoEnv-v0_0.1_0.1_0.sh: 0
342 ---- job_HopperMuJoCoEnv-v0_0.1_0.1_1.sh: 0
343 ---- job_HopperMuJoCoEnv-v0_0.1_0.2_0.sh: 0
344 ---- job_HopperMuJoCoEnv-v0_0.1_0.2_1.sh: 0
345 ---- job_HopperMuJoCoEnv-v0_0.1_0.4_0.sh: 0
346 ---- job_HopperMuJoCoEnv-v0_0.1_0.4_1.sh: 0
347 ---- job_HopperMuJoCoEnv-v0_0.1_0.6_0.sh: 0
348 ---- job_HopperMuJoCoEnv-v0_0.1_0.6_1.sh: 0
349 ---- job_HopperMuJoCoEnv-v0_0.1_0.8_0.sh: 0
350 ---- job_HopperMuJoCoEnv-v0_0.1_0.8_1.sh: 0
351 ---- job_HopperMuJoCoEnv-v0_0.2_0.1_0.sh: 0
352 ---- job_HopperMuJoCoEnv-v0_0.2_0.1_1.sh: 0
353 ---- job_HopperMuJoCoEnv-v0_

497 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.05_0.6_0.sh: 0
498 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.05_0.6_1.sh: 0
499 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.05_0.8_0.sh: 0
500 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.05_0.8_1.sh: 0
501 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.1_0.sh: 0
502 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.1_1.sh: 0
503 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.2_0.sh: 0
504 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.2_1.sh: 0
505 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.4_0.sh: 0
506 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.4_1.sh: 0
507 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.6_0.sh: 0
508 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.6_1.sh: 0
509 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.8_0.sh: 0
510 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.1_0.8_1.sh: 0
511 ---- job_InvertedDoublePendulumMuJoCoEnv-v0_0.2_0.1_0.sh: 0
512 ---- job_InvertedDoublePendulumM

625 ---- job_InvertedDoublePendulumPyBulletEnv-v0_0.8_0.4_0.sh: 0
626 ---- job_InvertedDoublePendulumPyBulletEnv-v0_0.8_0.4_1.sh: 0
627 ---- job_InvertedDoublePendulumPyBulletEnv-v0_0.8_0.6_0.sh: 0
628 ---- job_InvertedDoublePendulumPyBulletEnv-v0_0.8_0.6_1.sh: 0
629 ---- job_InvertedDoublePendulumPyBulletEnv-v0_0.8_0.8_0.sh: 0
630 ---- job_InvertedDoublePendulumPyBulletEnv-v0_0.8_0.8_1.sh: 0
631 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.1_0.sh: 0
632 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.1_1.sh: 0
633 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.2_0.sh: 0
634 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.2_1.sh: 0
635 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.4_0.sh: 0
636 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.4_1.sh: 0
637 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.6_0.sh: 0
638 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.6_1.sh: 0
639 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1_0.8_0.sh: 0
640 ---- job_InvertedDoubleP

768 ---- job_ReacherPyBulletEnv-v0_0.4_0.6_1.sh: 0
769 ---- job_ReacherPyBulletEnv-v0_0.4_0.8_0.sh: 0
770 ---- job_ReacherPyBulletEnv-v0_0.4_0.8_1.sh: 0
771 ---- job_ReacherPyBulletEnv-v0_0.6_0.1_0.sh: 0
772 ---- job_ReacherPyBulletEnv-v0_0.6_0.1_1.sh: 0
773 ---- job_ReacherPyBulletEnv-v0_0.6_0.2_0.sh: 0
774 ---- job_ReacherPyBulletEnv-v0_0.6_0.2_1.sh: 0
775 ---- job_ReacherPyBulletEnv-v0_0.6_0.4_0.sh: 0
776 ---- job_ReacherPyBulletEnv-v0_0.6_0.4_1.sh: 0
777 ---- job_ReacherPyBulletEnv-v0_0.6_0.6_0.sh: 0
778 ---- job_ReacherPyBulletEnv-v0_0.6_0.6_1.sh: 0
779 ---- job_ReacherPyBulletEnv-v0_0.6_0.8_0.sh: 0
780 ---- job_ReacherPyBulletEnv-v0_0.6_0.8_1.sh: 0
781 ---- job_ReacherPyBulletEnv-v0_0.8_0.1_0.sh: 0
782 ---- job_ReacherPyBulletEnv-v0_0.8_0.1_1.sh: 0
783 ---- job_ReacherPyBulletEnv-v0_0.8_0.2_0.sh: 0
784 ---- job_ReacherPyBulletEnv-v0_0.8_0.2_1.sh: 0
785 ---- job_ReacherPyBulletEnv-v0_0.8_0.4_0.sh: 0
786 ---- job_ReacherPyBulletEnv-v0_0.8_0.4_1.sh: 0
787 ---- job_ReacherPyBulletEnv

930 ---- job_Walker2DPyBulletEnv-v0_0.4_0.8_1.sh: 0
931 ---- job_Walker2DPyBulletEnv-v0_0.6_0.1_0.sh: 0
932 ---- job_Walker2DPyBulletEnv-v0_0.6_0.1_1.sh: 0
933 ---- job_Walker2DPyBulletEnv-v0_0.6_0.2_0.sh: 0
934 ---- job_Walker2DPyBulletEnv-v0_0.6_0.2_1.sh: 0
935 ---- job_Walker2DPyBulletEnv-v0_0.6_0.4_0.sh: 0
936 ---- job_Walker2DPyBulletEnv-v0_0.6_0.4_1.sh: 0
937 ---- job_Walker2DPyBulletEnv-v0_0.6_0.6_0.sh: 0
938 ---- job_Walker2DPyBulletEnv-v0_0.6_0.6_1.sh: 0
939 ---- job_Walker2DPyBulletEnv-v0_0.6_0.8_0.sh: 0
940 ---- job_Walker2DPyBulletEnv-v0_0.6_0.8_1.sh: 0
941 ---- job_Walker2DPyBulletEnv-v0_0.8_0.1_0.sh: 0
942 ---- job_Walker2DPyBulletEnv-v0_0.8_0.1_1.sh: 0
943 ---- job_Walker2DPyBulletEnv-v0_0.8_0.2_0.sh: 0
944 ---- job_Walker2DPyBulletEnv-v0_0.8_0.2_1.sh: 0
945 ---- job_Walker2DPyBulletEnv-v0_0.8_0.4_0.sh: 0
946 ---- job_Walker2DPyBulletEnv-v0_0.8_0.4_1.sh: 0
947 ---- job_Walker2DPyBulletEnv-v0_0.8_0.6_0.sh: 0
948 ---- job_Walker2DPyBulletEnv-v0_0.8_0.6_1.sh: 0
949 ---- job

In [4]:
# import os
# # job_sub_dir = './job_scripts'
# jobs = os.listdir(job_sub_dir)
# jobs.sort()
# i=1
# alg_name = 'td3_ow'#'vpg'
# for job in jobs:
#     if job.endswith(".sh"):
#         if alg_name in job:
#             code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
#             print('{} ---- {}: {}'.format(i, job, code))
#             i += 1
# #         break

1 ---- td3_ow_job_AntMuJoCoEnv-v0_0_MDP.sh: 0
2 ---- td3_ow_job_AntMuJoCoEnv-v0_0_POMDP.sh: 0
3 ---- td3_ow_job_AntMuJoCoEnv-v0_1_MDP.sh: 0
4 ---- td3_ow_job_AntMuJoCoEnv-v0_1_POMDP.sh: 0
5 ---- td3_ow_job_AntPyBulletEnv-v0_0_MDP.sh: 0
6 ---- td3_ow_job_AntPyBulletEnv-v0_0_POMDP.sh: 0
7 ---- td3_ow_job_AntPyBulletEnv-v0_1_MDP.sh: 0
8 ---- td3_ow_job_AntPyBulletEnv-v0_1_POMDP.sh: 0
9 ---- td3_ow_job_HalfCheetahMuJoCoEnv-v0_0_MDP.sh: 0
10 ---- td3_ow_job_HalfCheetahMuJoCoEnv-v0_0_POMDP.sh: 0
11 ---- td3_ow_job_HalfCheetahMuJoCoEnv-v0_1_MDP.sh: 0
12 ---- td3_ow_job_HalfCheetahMuJoCoEnv-v0_1_POMDP.sh: 0
13 ---- td3_ow_job_HalfCheetahPyBulletEnv-v0_0_MDP.sh: 0
14 ---- td3_ow_job_HalfCheetahPyBulletEnv-v0_0_POMDP.sh: 0
15 ---- td3_ow_job_HalfCheetahPyBulletEnv-v0_1_MDP.sh: 0
16 ---- td3_ow_job_HalfCheetahPyBulletEnv-v0_1_POMDP.sh: 0
17 ---- td3_ow_job_HopperMuJoCoEnv-v0_0_MDP.sh: 0
18 ---- td3_ow_job_HopperMuJoCoEnv-v0_0_POMDP.sh: 0
19 ---- td3_ow_job_HopperMuJoCoEnv-v0_1_MDP.sh: 0
20 ---- t

In [6]:
job_sub_dir

'./job_scripts_ow'