# DDPG-n-step Generate Jobs

In [1]:
import numpy as np
import os
import time
from datetime import datetime
CPU_NUM = 4
JOB_TIME = '0-10:00'
JOB_MEMORY = '16G'

In [2]:
timestample = str(datetime.now()).replace(' ','_').replace('-','_').replace(':','_').split('.')[0]
job_sub_dir = './{}_ddpg_n_step_new_job_scripts'.format(timestample)
job_out_dir = './{}_ddpg_n_step_new_job_scripts_output'.format(timestample)
data_out_dir = '{}_ddpg_n_step_new_PyBulletGym'.format(timestample)

if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [3]:
tasks = ['AntPyBulletEnv-v0', 'HalfCheetahPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0',
         'ReacherPyBulletEnv-v0', 'InvertedPendulumPyBulletEnv-v0', 'InvertedPendulumSwingupPyBulletEnv-v0', 
         'InvertedDoublePendulumPyBulletEnv-v0',
         'HumanoidPyBulletEnv-v0','HumanoidFlagrunPyBulletEnv-v0', 'HumanoidFlagrunHarderPyBulletEnv-v0',
         'InvertedDoublePendulumMuJoCoEnv-v0', 
         'Walker2DMuJoCoEnv-v0', 'HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'HumanoidMuJoCoEnv-v0']

# replay_size = ['1000000', '500000']
replay_size = ['1000000']
seeds = ['0', '1', '2', '3', '4']

n_step = [5, 8]
backup_method = ['avg_n_step', 'min_n_step', 'avg_n_step_exclude_1']

i = 0
for task in tasks:
    for b_s in replay_size:
        for n_s in n_step:
            for b_m in backup_method:
                for s in seeds:
                    job_filename = 'job_{0}_{1}_{2}_{3}_{4}.sh'.format(task, b_s, n_s, b_m, s)
                    i += 1
                    print('Job {}: {}'.format(i, job_filename))
                    
                    with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                        job_file.write('#!/bin/bash\n')
                        job_file.write('#SBATCH --account=def-dkulic\n')
                        job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                        job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                        job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                        job_file.write('#SBATCH --output={0}/ddpg_new_step_new_{1}_{2}_{3}_{4}_{5}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s, b_s, n_s, b_m))
                        job_file.write('## Main processing command\n')
                        job_file.write('module load cuda cudnn \n')
                        job_file.write('source ~/tf_gpu/bin/activate\n')
                        job_file.write('python ../algos/ddpg_n_step_new/ddpg_n_step_new.py  --env {0} --seed {1} --replay_size {2} --n_step {3} --backup_method {4} --data_dir {5} --log_n_step_online_expansion_and_boostrapping --exp_name ddpg_n_step_new_{0}_{2}_{3}_{4}'.format(task, s, b_s, n_s, b_m, data_out_dir))


n_step = [8]
backup_method = ['1_step', '2_step', '3_step', '4_step', '5_step']
j = 0
for task in tasks:
    for b_s in replay_size:
        for n_s in n_step:
            for b_m in backup_method:
                for s in seeds:
                    job_filename = 'job_{0}_{1}_{2}_{3}_{4}.sh'.format(task, b_s, n_s, b_m, s)
                    j += 1
                    print('Job {}: {}'.format(i+j, job_filename))
                    
                    with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                        job_file.write('#!/bin/bash\n')
                        job_file.write('#SBATCH --account=def-dkulic\n')
                        job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                        job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                        job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                        job_file.write('#SBATCH --output={0}/ddpg_new_step_new_{1}_{2}_{3}_{4}_{5}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s, b_s, n_s, b_m))
                        job_file.write('## Main processing command\n')
                        job_file.write('module load cuda cudnn \n')
                        job_file.write('source ~/tf_gpu/bin/activate\n')
                        job_file.write('python ../algos/ddpg_n_step_new/ddpg_n_step_new.py  --env {0} --seed {1} --replay_size {2} --n_step {3} --backup_method {4} --data_dir {5} --log_n_step_online_expansion_and_boostrapping --exp_name ddpg_n_step_new_{0}_{2}_{3}_{4}'.format(task, s, b_s, n_s, b_m, data_out_dir))

# n_step = [8]
# backup_method = ['6_step', '7_step', '8_step']
# for task in tasks:
#     for b_s in replay_size:
#         for n_s in n_step:
#             for b_m in backup_method:
#                 for s in seeds:
#                     job_filename = 'job_{0}_{1}_{2}_{3}.sh'.format(task, s, b_s, b_m)
#                     i += 1
#                     print('Job {}: {}'.format(i, job_filename))
                    
#                     with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
#                         job_file.write('#!/bin/bash\n')
#                         job_file.write('#SBATCH --account=def-dkulic\n')
#                         job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
#                         job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
#                         job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
#                         job_file.write('#SBATCH --output={0}/ddpg_new_step_new_{1}_{2}_{3}_{4}_{5}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s, b_s, n_s, b_m))
#                         job_file.write('## Main processing command\n')
#                         job_file.write('module load cuda cudnn \n')
#                         job_file.write('source ~/tf_gpu/bin/activate\n')
#                         job_file.write('python ../algos/ddpg_n_step_new/ddpg_n_step_new.py  --env {0} --seed {1} --replay_size {2} --n_step {3} --backup_method {4} --data_dir {5} --exp_name ddpg_n_step_new_{0}_{2}_{3}_{4}'.format(task, s, b_s, n_s, b_m, data_out_dir))



Job 1: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_0.sh
Job 2: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_1.sh
Job 3: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_2.sh
Job 4: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_3.sh
Job 5: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_4.sh
Job 6: job_AntPyBulletEnv-v0_1000000_5_min_n_step_0.sh
Job 7: job_AntPyBulletEnv-v0_1000000_5_min_n_step_1.sh
Job 8: job_AntPyBulletEnv-v0_1000000_5_min_n_step_2.sh
Job 9: job_AntPyBulletEnv-v0_1000000_5_min_n_step_3.sh
Job 10: job_AntPyBulletEnv-v0_1000000_5_min_n_step_4.sh
Job 11: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_0.sh
Job 12: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_1.sh
Job 13: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_2.sh
Job 14: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_3.sh
Job 15: job_AntPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_4.sh
Job 16: job_AntPyBulletEnv-v0_1000000_8_avg_n_step_0.sh
Job 17: job_AntPyBulletEnv-v0_1000000_8_avg_n_step_1.sh
Job 18:

Job 194: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_3.sh
Job 195: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_4.sh
Job 196: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_avg_n_step_0.sh
Job 197: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_avg_n_step_1.sh
Job 198: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_avg_n_step_2.sh
Job 199: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_avg_n_step_3.sh
Job 200: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_avg_n_step_4.sh
Job 201: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_min_n_step_0.sh
Job 202: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_min_n_step_1.sh
Job 203: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_min_n_step_2.sh
Job 204: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_min_n_step_3.sh
Job 205: job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_min_n_step_4.sh
Job 206: job_InvertedPendulumSwingupPyBulletEnv-v0_10000

Job 351: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_min_n_step_0.sh
Job 352: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_min_n_step_1.sh
Job 353: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_min_n_step_2.sh
Job 354: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_min_n_step_3.sh
Job 355: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_min_n_step_4.sh
Job 356: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_0.sh
Job 357: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_1.sh
Job 358: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_2.sh
Job 359: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_3.sh
Job 360: job_InvertedDoublePendulumMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_4.sh
Job 361: job_Walker2DMuJoCoEnv-v0_1000000_5_avg_n_step_0.sh
Job 362: job_Walker2DMuJoCoEnv-v0_1000000_5_avg_n_step_1.sh
Job 363: job_Walker2DMuJoCoEnv-v0_1000000_5_avg_n_step_2.sh
Job 364: job_Walker2DMuJoCoEnv

Job 542: job_HalfCheetahPyBulletEnv-v0_1000000_8_2_step_1.sh
Job 543: job_HalfCheetahPyBulletEnv-v0_1000000_8_2_step_2.sh
Job 544: job_HalfCheetahPyBulletEnv-v0_1000000_8_2_step_3.sh
Job 545: job_HalfCheetahPyBulletEnv-v0_1000000_8_2_step_4.sh
Job 546: job_HalfCheetahPyBulletEnv-v0_1000000_8_3_step_0.sh
Job 547: job_HalfCheetahPyBulletEnv-v0_1000000_8_3_step_1.sh
Job 548: job_HalfCheetahPyBulletEnv-v0_1000000_8_3_step_2.sh
Job 549: job_HalfCheetahPyBulletEnv-v0_1000000_8_3_step_3.sh
Job 550: job_HalfCheetahPyBulletEnv-v0_1000000_8_3_step_4.sh
Job 551: job_HalfCheetahPyBulletEnv-v0_1000000_8_4_step_0.sh
Job 552: job_HalfCheetahPyBulletEnv-v0_1000000_8_4_step_1.sh
Job 553: job_HalfCheetahPyBulletEnv-v0_1000000_8_4_step_2.sh
Job 554: job_HalfCheetahPyBulletEnv-v0_1000000_8_4_step_3.sh
Job 555: job_HalfCheetahPyBulletEnv-v0_1000000_8_4_step_4.sh
Job 556: job_HalfCheetahPyBulletEnv-v0_1000000_8_5_step_0.sh
Job 557: job_HalfCheetahPyBulletEnv-v0_1000000_8_5_step_1.sh
Job 558: job_HalfCheetah

Job 692: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_1.sh
Job 693: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_2.sh
Job 694: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_3.sh
Job 695: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_4.sh
Job 696: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_3_step_0.sh
Job 697: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_3_step_1.sh
Job 698: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_3_step_2.sh
Job 699: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_3_step_3.sh
Job 700: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_3_step_4.sh
Job 701: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_4_step_0.sh
Job 702: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_4_step_1.sh
Job 703: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_4_step_2.sh
Job 704: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_4_step_3.sh
Job 705: job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_4_st

Job 864: job_AntMuJoCoEnv-v0_1000000_8_1_step_3.sh
Job 865: job_AntMuJoCoEnv-v0_1000000_8_1_step_4.sh
Job 866: job_AntMuJoCoEnv-v0_1000000_8_2_step_0.sh
Job 867: job_AntMuJoCoEnv-v0_1000000_8_2_step_1.sh
Job 868: job_AntMuJoCoEnv-v0_1000000_8_2_step_2.sh
Job 869: job_AntMuJoCoEnv-v0_1000000_8_2_step_3.sh
Job 870: job_AntMuJoCoEnv-v0_1000000_8_2_step_4.sh
Job 871: job_AntMuJoCoEnv-v0_1000000_8_3_step_0.sh
Job 872: job_AntMuJoCoEnv-v0_1000000_8_3_step_1.sh
Job 873: job_AntMuJoCoEnv-v0_1000000_8_3_step_2.sh
Job 874: job_AntMuJoCoEnv-v0_1000000_8_3_step_3.sh
Job 875: job_AntMuJoCoEnv-v0_1000000_8_3_step_4.sh
Job 876: job_AntMuJoCoEnv-v0_1000000_8_4_step_0.sh
Job 877: job_AntMuJoCoEnv-v0_1000000_8_4_step_1.sh
Job 878: job_AntMuJoCoEnv-v0_1000000_8_4_step_2.sh
Job 879: job_AntMuJoCoEnv-v0_1000000_8_4_step_3.sh
Job 880: job_AntMuJoCoEnv-v0_1000000_8_4_step_4.sh
Job 881: job_AntMuJoCoEnv-v0_1000000_8_5_step_0.sh
Job 882: job_AntMuJoCoEnv-v0_1000000_8_5_step_1.sh
Job 883: job_AntMuJoCoEnv-v0_10

In [4]:
len(os.listdir(job_sub_dir))

935

In [6]:
import os
# job_sub_dir = './2020_01_12_00_44_38_td3_job_scripts'
jobs = os.listdir(job_sub_dir)
jobs.sort()
i=1
for job in jobs:
    code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
    print('{} ---- {}: {}'.format(i, job, code))
    i += 1

1 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_0.sh: 0
2 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_1.sh: 0
3 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_2.sh: 0
4 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_3.sh: 0
5 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_4.sh: 0
6 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_exclude_1_0.sh: 0
7 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_exclude_1_1.sh: 0
8 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_exclude_1_2.sh: 0
9 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_exclude_1_3.sh: 0
10 ---- job_AntMuJoCoEnv-v0_1000000_5_avg_n_step_exclude_1_4.sh: 0
11 ---- job_AntMuJoCoEnv-v0_1000000_5_min_n_step_0.sh: 0
12 ---- job_AntMuJoCoEnv-v0_1000000_5_min_n_step_1.sh: 0
13 ---- job_AntMuJoCoEnv-v0_1000000_5_min_n_step_2.sh: 0
14 ---- job_AntMuJoCoEnv-v0_1000000_5_min_n_step_3.sh: 0
15 ---- job_AntMuJoCoEnv-v0_1000000_5_min_n_step_4.sh: 0
16 ---- job_AntMuJoCoEnv-v0_1000000_8_1_step_0.sh: 0
17 ---- job_AntMuJoCoEnv-v0_1000000_8_1_step_1.sh: 

139 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_3_step_3.sh: 0
140 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_3_step_4.sh: 0
141 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_4_step_0.sh: 0
142 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_4_step_1.sh: 0
143 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_4_step_2.sh: 0
144 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_4_step_3.sh: 0
145 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_4_step_4.sh: 0
146 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_5_step_0.sh: 0
147 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_5_step_1.sh: 0
148 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_5_step_2.sh: 0
149 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_5_step_3.sh: 0
150 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_5_step_4.sh: 0
151 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_avg_n_step_0.sh: 0
152 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_avg_n_step_1.sh: 0
153 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_avg_n_step_2.sh: 0
154 ---- job_HalfCheetahMuJoCoEnv-v0_1000000_8_avg_n_step_

267 ---- job_HopperMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_1.sh: 0
268 ---- job_HopperMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_2.sh: 0
269 ---- job_HopperMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_3.sh: 0
270 ---- job_HopperMuJoCoEnv-v0_1000000_8_avg_n_step_exclude_1_4.sh: 0
271 ---- job_HopperMuJoCoEnv-v0_1000000_8_min_n_step_0.sh: 0
272 ---- job_HopperMuJoCoEnv-v0_1000000_8_min_n_step_1.sh: 0
273 ---- job_HopperMuJoCoEnv-v0_1000000_8_min_n_step_2.sh: 0
274 ---- job_HopperMuJoCoEnv-v0_1000000_8_min_n_step_3.sh: 0
275 ---- job_HopperMuJoCoEnv-v0_1000000_8_min_n_step_4.sh: 0
276 ---- job_HopperPyBulletEnv-v0_1000000_5_avg_n_step_0.sh: 0
277 ---- job_HopperPyBulletEnv-v0_1000000_5_avg_n_step_1.sh: 0
278 ---- job_HopperPyBulletEnv-v0_1000000_5_avg_n_step_2.sh: 0
279 ---- job_HopperPyBulletEnv-v0_1000000_5_avg_n_step_3.sh: 0
280 ---- job_HopperPyBulletEnv-v0_1000000_5_avg_n_step_4.sh: 0
281 ---- job_HopperPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_0.sh: 0
282 ---- job_HopperPyBu

385 ---- job_HumanoidFlagrunHarderPyBulletEnv-v0_1000000_8_min_n_step_4.sh: 0
386 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_0.sh: 0
387 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_1.sh: 0
388 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_2.sh: 0
389 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_3.sh: 0
390 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_4.sh: 0
391 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_0.sh: 0
392 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_1.sh: 0
393 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_2.sh: 0
394 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_3.sh: 0
395 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_avg_n_step_exclude_1_4.sh: 0
396 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_min_n_step_0.sh: 0
397 ---- job_HumanoidFlagrunPyBulletEnv-v0_1000000_5_min_n_step_1.sh: 0
398 ----

506 ---- job_HumanoidPyBulletEnv-v0_1000000_5_min_n_step_0.sh: 0
507 ---- job_HumanoidPyBulletEnv-v0_1000000_5_min_n_step_1.sh: 0
508 ---- job_HumanoidPyBulletEnv-v0_1000000_5_min_n_step_2.sh: 0
509 ---- job_HumanoidPyBulletEnv-v0_1000000_5_min_n_step_3.sh: 0
510 ---- job_HumanoidPyBulletEnv-v0_1000000_5_min_n_step_4.sh: 0
511 ---- job_HumanoidPyBulletEnv-v0_1000000_8_1_step_0.sh: 0
512 ---- job_HumanoidPyBulletEnv-v0_1000000_8_1_step_1.sh: 0
513 ---- job_HumanoidPyBulletEnv-v0_1000000_8_1_step_2.sh: 0
514 ---- job_HumanoidPyBulletEnv-v0_1000000_8_1_step_3.sh: 0
515 ---- job_HumanoidPyBulletEnv-v0_1000000_8_1_step_4.sh: 0
516 ---- job_HumanoidPyBulletEnv-v0_1000000_8_2_step_0.sh: 0
517 ---- job_HumanoidPyBulletEnv-v0_1000000_8_2_step_1.sh: 0
518 ---- job_HumanoidPyBulletEnv-v0_1000000_8_2_step_2.sh: 0
519 ---- job_HumanoidPyBulletEnv-v0_1000000_8_2_step_3.sh: 0
520 ---- job_HumanoidPyBulletEnv-v0_1000000_8_2_step_4.sh: 0
521 ---- job_HumanoidPyBulletEnv-v0_1000000_8_3_step_0.sh: 0
522 

620 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_5_min_n_step_4.sh: 0
621 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_1_step_0.sh: 0
622 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_1_step_1.sh: 0
623 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_1_step_2.sh: 0
624 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_1_step_3.sh: 0
625 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_1_step_4.sh: 0
626 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_0.sh: 0
627 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_1.sh: 0
628 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_2.sh: 0
629 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_3.sh: 0
630 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_2_step_4.sh: 0
631 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_3_step_0.sh: 0
632 ---- job_InvertedDoublePendulumPyBulletEnv-v0_1000000_8_3_step_1.sh: 0
633 ---- job_Inverted

728 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_5_min_n_step_2.sh: 0
729 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_5_min_n_step_3.sh: 0
730 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_5_min_n_step_4.sh: 0
731 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_1_step_0.sh: 0
732 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_1_step_1.sh: 0
733 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_1_step_2.sh: 0
734 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_1_step_3.sh: 0
735 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_1_step_4.sh: 0
736 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_2_step_0.sh: 0
737 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_2_step_1.sh: 0
738 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_2_step_2.sh: 0
739 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_2_step_3.sh: 0
740 ---- job_InvertedPendulumSwingupPyBulletEnv-v0_1000000_8_2_step_4.sh: 0


848 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_2_step_2.sh: 0
849 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_2_step_3.sh: 0
850 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_2_step_4.sh: 0
851 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_3_step_0.sh: 0
852 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_3_step_1.sh: 0
853 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_3_step_2.sh: 0
854 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_3_step_3.sh: 0
855 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_3_step_4.sh: 0
856 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_4_step_0.sh: 0
857 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_4_step_1.sh: 0
858 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_4_step_2.sh: 0
859 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_4_step_3.sh: 0
860 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_4_step_4.sh: 0
861 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_5_step_0.sh: 0
862 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_5_step_1.sh: 0
863 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_5_step_2.sh: 0
864 ---- job_Walker2DMuJoCoEnv-v0_1000000_8_5_step_3.sh:

# Baseline

In [1]:
import numpy as np
import os
import time
from datetime import datetime
CPU_NUM = 4
JOB_TIME = '0-10:00'
JOB_MEMORY = '12000M'

In [2]:
tasks = ['AntPyBulletEnv-v0', 'HalfCheetahPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0',
         'ReacherPyBulletEnv-v0', 'InvertedPendulumPyBulletEnv-v0', 'InvertedPendulumSwingupPyBulletEnv-v0', 
         'InvertedDoublePendulumPyBulletEnv-v0',
         'HumanoidPyBulletEnv-v0','HumanoidFlagrunPyBulletEnv-v0', 'HumanoidFlagrunHarderPyBulletEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'Walker2DMuJoCoEnv-v0', 'HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'HumanoidMuJoCoEnv-v0']

## DDPG

In [3]:
timestample = str(datetime.now()).replace(' ','_').replace('-','_').replace(':','_').split('.')[0]
job_sub_dir = './{}_ddpg_job_scripts'.format(timestample)
job_out_dir = './{}_ddpg_job_scripts_output'.format(timestample)
data_out_dir = '{}_ddpg_PyBulletGym'.format(timestample)

if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [4]:

seeds = ['0', '1', '2', '3', '4']
for task in tasks:
    for s in seeds:
        job_filename = 'job_{0}_{1}.sh'.format(task, s)
        print(job_filename)
        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
            job_file.write('#!/bin/bash\n')
            job_file.write('#SBATCH --account=def-dkulic\n')
            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
            job_file.write('#SBATCH --output={0}/{1}_{2}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s))
            job_file.write('## Main processing command\n')
            job_file.write('module load cuda cudnn \n')
            job_file.write('source ~/tf_gpu/bin/activate\n')
            job_file.write('python ../algos_original/ddpg/ddpg.py  --env {0} --seed {1} --data_dir {2} --exp_name ddpg_{0}'.format(task, s, data_out_dir))


job_AntPyBulletEnv-v0_0.sh
job_AntPyBulletEnv-v0_1.sh
job_AntPyBulletEnv-v0_2.sh
job_AntPyBulletEnv-v0_3.sh
job_AntPyBulletEnv-v0_4.sh
job_HalfCheetahPyBulletEnv-v0_0.sh
job_HalfCheetahPyBulletEnv-v0_1.sh
job_HalfCheetahPyBulletEnv-v0_2.sh
job_HalfCheetahPyBulletEnv-v0_3.sh
job_HalfCheetahPyBulletEnv-v0_4.sh
job_Walker2DPyBulletEnv-v0_0.sh
job_Walker2DPyBulletEnv-v0_1.sh
job_Walker2DPyBulletEnv-v0_2.sh
job_Walker2DPyBulletEnv-v0_3.sh
job_Walker2DPyBulletEnv-v0_4.sh
job_HopperPyBulletEnv-v0_0.sh
job_HopperPyBulletEnv-v0_1.sh
job_HopperPyBulletEnv-v0_2.sh
job_HopperPyBulletEnv-v0_3.sh
job_HopperPyBulletEnv-v0_4.sh
job_ReacherPyBulletEnv-v0_0.sh
job_ReacherPyBulletEnv-v0_1.sh
job_ReacherPyBulletEnv-v0_2.sh
job_ReacherPyBulletEnv-v0_3.sh
job_ReacherPyBulletEnv-v0_4.sh
job_InvertedPendulumPyBulletEnv-v0_0.sh
job_InvertedPendulumPyBulletEnv-v0_1.sh
job_InvertedPendulumPyBulletEnv-v0_2.sh
job_InvertedPendulumPyBulletEnv-v0_3.sh
job_InvertedPendulumPyBulletEnv-v0_4.sh
job_InvertedPendulumSwing

In [5]:
import os
# job_sub_dir = './2020_01_12_00_44_38_td3_job_scripts'
jobs = os.listdir(job_sub_dir)
jobs.sort()
i=1
for job in jobs:
    code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
    print('{} ---- {}: {}'.format(i, job, code))
    i += 1


1 ---- job_AntMuJoCoEnv-v0_0.sh: 0
2 ---- job_AntMuJoCoEnv-v0_1.sh: 0
3 ---- job_AntMuJoCoEnv-v0_2.sh: 0
4 ---- job_AntMuJoCoEnv-v0_3.sh: 0
5 ---- job_AntMuJoCoEnv-v0_4.sh: 0
6 ---- job_AntPyBulletEnv-v0_0.sh: 0
7 ---- job_AntPyBulletEnv-v0_1.sh: 0
8 ---- job_AntPyBulletEnv-v0_2.sh: 0
9 ---- job_AntPyBulletEnv-v0_3.sh: 0
10 ---- job_AntPyBulletEnv-v0_4.sh: 0
11 ---- job_HalfCheetahMuJoCoEnv-v0_0.sh: 0
12 ---- job_HalfCheetahMuJoCoEnv-v0_1.sh: 0
13 ---- job_HalfCheetahMuJoCoEnv-v0_2.sh: 0
14 ---- job_HalfCheetahMuJoCoEnv-v0_3.sh: 0
15 ---- job_HalfCheetahMuJoCoEnv-v0_4.sh: 0
16 ---- job_HalfCheetahPyBulletEnv-v0_0.sh: 0
17 ---- job_HalfCheetahPyBulletEnv-v0_1.sh: 0
18 ---- job_HalfCheetahPyBulletEnv-v0_2.sh: 0
19 ---- job_HalfCheetahPyBulletEnv-v0_3.sh: 0
20 ---- job_HalfCheetahPyBulletEnv-v0_4.sh: 0
21 ---- job_HopperMuJoCoEnv-v0_0.sh: 0
22 ---- job_HopperMuJoCoEnv-v0_1.sh: 0
23 ---- job_HopperMuJoCoEnv-v0_2.sh: 0
24 ---- job_HopperMuJoCoEnv-v0_3.sh: 0
25 ---- job_HopperMuJoCoEnv-v0_4.

## TD3

In [16]:
timestample = str(datetime.now()).replace(' ','_').replace('-','_').replace(':','_').split('.')[0]
job_sub_dir = './{}_td3_job_scripts'.format(timestample)
job_out_dir = './{}_td3_job_scripts_output'.format(timestample)
data_out_dir = '{}_td3_PyBulletGym'.format(timestample)

if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [17]:
# os.getcwd()

In [18]:
# os.path.abspath('../algos_original/td3/td3.py')

In [19]:

seeds = ['0', '1', '2', '3', '4']
for task in tasks:
    for s in seeds:
        job_filename = 'job_{0}_{1}.sh'.format(task, s)
        print(job_filename)
        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
            job_file.write('#!/bin/bash\n')
            job_file.write('#SBATCH --account=def-dkulic\n')
            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
            job_file.write('#SBATCH --output={0}/{1}_{2}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s))
            job_file.write('## Main processing command\n')
            job_file.write('module load cuda cudnn \n')
            job_file.write('source ~/tf_gpu/bin/activate\n')
            job_file.write('python ../algos_original/td3/td3.py  --env {0} --seed {1} --data_dir {2} --exp_name td3_{0}'.format(task, s, data_out_dir))


job_AntPyBulletEnv-v0_0.sh
job_AntPyBulletEnv-v0_1.sh
job_AntPyBulletEnv-v0_2.sh
job_AntPyBulletEnv-v0_3.sh
job_AntPyBulletEnv-v0_4.sh
job_HalfCheetahPyBulletEnv-v0_0.sh
job_HalfCheetahPyBulletEnv-v0_1.sh
job_HalfCheetahPyBulletEnv-v0_2.sh
job_HalfCheetahPyBulletEnv-v0_3.sh
job_HalfCheetahPyBulletEnv-v0_4.sh
job_Walker2DPyBulletEnv-v0_0.sh
job_Walker2DPyBulletEnv-v0_1.sh
job_Walker2DPyBulletEnv-v0_2.sh
job_Walker2DPyBulletEnv-v0_3.sh
job_Walker2DPyBulletEnv-v0_4.sh
job_HopperPyBulletEnv-v0_0.sh
job_HopperPyBulletEnv-v0_1.sh
job_HopperPyBulletEnv-v0_2.sh
job_HopperPyBulletEnv-v0_3.sh
job_HopperPyBulletEnv-v0_4.sh
job_ReacherPyBulletEnv-v0_0.sh
job_ReacherPyBulletEnv-v0_1.sh
job_ReacherPyBulletEnv-v0_2.sh
job_ReacherPyBulletEnv-v0_3.sh
job_ReacherPyBulletEnv-v0_4.sh
job_InvertedPendulumPyBulletEnv-v0_0.sh
job_InvertedPendulumPyBulletEnv-v0_1.sh
job_InvertedPendulumPyBulletEnv-v0_2.sh
job_InvertedPendulumPyBulletEnv-v0_3.sh
job_InvertedPendulumPyBulletEnv-v0_4.sh
job_InvertedPendulumSwing

In [20]:
import os
# job_sub_dir = './2020_01_12_00_44_38_td3_job_scripts'
jobs = os.listdir(job_sub_dir)
jobs.sort()
i=1
for job in jobs:
    code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
    print('{} ---- {}: {}'.format(i, job, code))
    i += 1


1 ---- job_AntMuJoCoEnv-v0_0.sh: 0
2 ---- job_AntMuJoCoEnv-v0_1.sh: 0
3 ---- job_AntMuJoCoEnv-v0_2.sh: 0
4 ---- job_AntMuJoCoEnv-v0_3.sh: 0
5 ---- job_AntMuJoCoEnv-v0_4.sh: 0
6 ---- job_AntPyBulletEnv-v0_0.sh: 0
7 ---- job_AntPyBulletEnv-v0_1.sh: 0
8 ---- job_AntPyBulletEnv-v0_2.sh: 0
9 ---- job_AntPyBulletEnv-v0_3.sh: 0
10 ---- job_AntPyBulletEnv-v0_4.sh: 0
11 ---- job_HalfCheetahMuJoCoEnv-v0_0.sh: 0
12 ---- job_HalfCheetahMuJoCoEnv-v0_1.sh: 0
13 ---- job_HalfCheetahMuJoCoEnv-v0_2.sh: 0
14 ---- job_HalfCheetahMuJoCoEnv-v0_3.sh: 0
15 ---- job_HalfCheetahMuJoCoEnv-v0_4.sh: 0
16 ---- job_HalfCheetahPyBulletEnv-v0_0.sh: 0
17 ---- job_HalfCheetahPyBulletEnv-v0_1.sh: 0
18 ---- job_HalfCheetahPyBulletEnv-v0_2.sh: 0
19 ---- job_HalfCheetahPyBulletEnv-v0_3.sh: 0
20 ---- job_HalfCheetahPyBulletEnv-v0_4.sh: 0
21 ---- job_HopperMuJoCoEnv-v0_0.sh: 0
22 ---- job_HopperMuJoCoEnv-v0_1.sh: 0
23 ---- job_HopperMuJoCoEnv-v0_2.sh: 0
24 ---- job_HopperMuJoCoEnv-v0_3.sh: 0
25 ---- job_HopperMuJoCoEnv-v0_4.

## SAC

In [10]:
timestample = str(datetime.now()).replace(' ','_').replace('-','_').replace(':','_').split('.')[0]
job_sub_dir = './{}_sac_job_scripts'.format(timestample)
job_out_dir = './{}_sac_job_scripts_output'.format(timestample)
data_out_dir = '{}_sac_PyBulletGym'.format(timestample)

if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [11]:

seeds = ['0', '1', '2', '3', '4']
for task in tasks:
    for s in seeds:
        job_filename = 'job_{0}_{1}.sh'.format(task, s)
        print(job_filename)
        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
            job_file.write('#!/bin/bash\n')
            job_file.write('#SBATCH --account=def-dkulic\n')
            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
            job_file.write('#SBATCH --output={0}/{1}_{2}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s))
            job_file.write('## Main processing command\n')
            job_file.write('module load cuda cudnn \n')
            job_file.write('source ~/tf_gpu/bin/activate\n')
            job_file.write('python ../algos_original/sac/sac.py  --env {0} --seed {1} --data_dir {2} --exp_name sac_{0}'.format(task, s, data_out_dir))


job_AntPyBulletEnv-v0_0.sh
job_AntPyBulletEnv-v0_1.sh
job_AntPyBulletEnv-v0_2.sh
job_AntPyBulletEnv-v0_3.sh
job_AntPyBulletEnv-v0_4.sh
job_HalfCheetahPyBulletEnv-v0_0.sh
job_HalfCheetahPyBulletEnv-v0_1.sh
job_HalfCheetahPyBulletEnv-v0_2.sh
job_HalfCheetahPyBulletEnv-v0_3.sh
job_HalfCheetahPyBulletEnv-v0_4.sh
job_Walker2DPyBulletEnv-v0_0.sh
job_Walker2DPyBulletEnv-v0_1.sh
job_Walker2DPyBulletEnv-v0_2.sh
job_Walker2DPyBulletEnv-v0_3.sh
job_Walker2DPyBulletEnv-v0_4.sh
job_HopperPyBulletEnv-v0_0.sh
job_HopperPyBulletEnv-v0_1.sh
job_HopperPyBulletEnv-v0_2.sh
job_HopperPyBulletEnv-v0_3.sh
job_HopperPyBulletEnv-v0_4.sh
job_ReacherPyBulletEnv-v0_0.sh
job_ReacherPyBulletEnv-v0_1.sh
job_ReacherPyBulletEnv-v0_2.sh
job_ReacherPyBulletEnv-v0_3.sh
job_ReacherPyBulletEnv-v0_4.sh
job_InvertedPendulumPyBulletEnv-v0_0.sh
job_InvertedPendulumPyBulletEnv-v0_1.sh
job_InvertedPendulumPyBulletEnv-v0_2.sh
job_InvertedPendulumPyBulletEnv-v0_3.sh
job_InvertedPendulumPyBulletEnv-v0_4.sh
job_InvertedPendulumSwing

In [12]:
import os
# job_sub_dir = './2020_01_12_00_44_38_td3_job_scripts'
jobs = os.listdir(job_sub_dir)
jobs.sort()
i=1
for job in jobs:
    code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
    print('{} ---- {}: {}'.format(i, job, code))
    i += 1


1 ---- job_AntMuJoCoEnv-v0_0.sh: 0
2 ---- job_AntMuJoCoEnv-v0_1.sh: 0
3 ---- job_AntMuJoCoEnv-v0_2.sh: 0
4 ---- job_AntMuJoCoEnv-v0_3.sh: 0
5 ---- job_AntMuJoCoEnv-v0_4.sh: 0
6 ---- job_AntPyBulletEnv-v0_0.sh: 0
7 ---- job_AntPyBulletEnv-v0_1.sh: 0
8 ---- job_AntPyBulletEnv-v0_2.sh: 0
9 ---- job_AntPyBulletEnv-v0_3.sh: 0
10 ---- job_AntPyBulletEnv-v0_4.sh: 0
11 ---- job_HalfCheetahMuJoCoEnv-v0_0.sh: 0
12 ---- job_HalfCheetahMuJoCoEnv-v0_1.sh: 0
13 ---- job_HalfCheetahMuJoCoEnv-v0_2.sh: 0
14 ---- job_HalfCheetahMuJoCoEnv-v0_3.sh: 0
15 ---- job_HalfCheetahMuJoCoEnv-v0_4.sh: 0
16 ---- job_HalfCheetahPyBulletEnv-v0_0.sh: 0
17 ---- job_HalfCheetahPyBulletEnv-v0_1.sh: 0
18 ---- job_HalfCheetahPyBulletEnv-v0_2.sh: 0
19 ---- job_HalfCheetahPyBulletEnv-v0_3.sh: 0
20 ---- job_HalfCheetahPyBulletEnv-v0_4.sh: 0
21 ---- job_HopperMuJoCoEnv-v0_0.sh: 0
22 ---- job_HopperMuJoCoEnv-v0_1.sh: 0
23 ---- job_HopperMuJoCoEnv-v0_2.sh: 0
24 ---- job_HopperMuJoCoEnv-v0_3.sh: 0
25 ---- job_HopperMuJoCoEnv-v0_4.

## PPO

In [21]:
timestample = str(datetime.now()).replace(' ','_').replace('-','_').replace(':','_').split('.')[0]
job_sub_dir = './{}_ppo_job_scripts'.format(timestample)
job_out_dir = './{}_ppo_job_scripts_output'.format(timestample)
data_out_dir = '{}_ppo_PyBulletGym'.format(timestample)

if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [22]:
#SBATCH --ntasks=4               # number of MPI processes
#SBATCH --mem-per-cpu=1024M      # memory; default unit is megabytes

JOB_MEMORY = 4096
seeds = ['0', '1', '2', '3', '4']
for task in tasks:
    for s in seeds:
        job_filename = 'job_{0}_{1}.sh'.format(task, s)
        print(job_filename)
        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
            job_file.write('#!/bin/bash\n')
            job_file.write('#SBATCH --account=def-dkulic\n')
            job_file.write('#SBATCH --ntasks={} #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
            job_file.write('#SBATCH --mem-per-cpu={}M           # memory per node\n'.format(JOB_MEMORY))
            job_file.write('#SBATCH --time={}          # time (DD-HH:MM)\n'.format(JOB_TIME))
            job_file.write('#SBATCH --output={0}/{1}_{2}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s))
            job_file.write('## Main processing command\n')
            job_file.write('module load cuda cudnn \n')
            job_file.write('source ~/tf_gpu/bin/activate\n')
            job_file.write('mpirun -np {0} python ../algos_original/ppo/ppo.py  --env {1} --seed {2} --data_dir {3} --exp_name ppo_{1}'.format(CPU_NUM, task, s, data_out_dir))


job_AntPyBulletEnv-v0_0.sh
job_AntPyBulletEnv-v0_1.sh
job_AntPyBulletEnv-v0_2.sh
job_AntPyBulletEnv-v0_3.sh
job_AntPyBulletEnv-v0_4.sh
job_HalfCheetahPyBulletEnv-v0_0.sh
job_HalfCheetahPyBulletEnv-v0_1.sh
job_HalfCheetahPyBulletEnv-v0_2.sh
job_HalfCheetahPyBulletEnv-v0_3.sh
job_HalfCheetahPyBulletEnv-v0_4.sh
job_Walker2DPyBulletEnv-v0_0.sh
job_Walker2DPyBulletEnv-v0_1.sh
job_Walker2DPyBulletEnv-v0_2.sh
job_Walker2DPyBulletEnv-v0_3.sh
job_Walker2DPyBulletEnv-v0_4.sh
job_HopperPyBulletEnv-v0_0.sh
job_HopperPyBulletEnv-v0_1.sh
job_HopperPyBulletEnv-v0_2.sh
job_HopperPyBulletEnv-v0_3.sh
job_HopperPyBulletEnv-v0_4.sh
job_ReacherPyBulletEnv-v0_0.sh
job_ReacherPyBulletEnv-v0_1.sh
job_ReacherPyBulletEnv-v0_2.sh
job_ReacherPyBulletEnv-v0_3.sh
job_ReacherPyBulletEnv-v0_4.sh
job_InvertedPendulumPyBulletEnv-v0_0.sh
job_InvertedPendulumPyBulletEnv-v0_1.sh
job_InvertedPendulumPyBulletEnv-v0_2.sh
job_InvertedPendulumPyBulletEnv-v0_3.sh
job_InvertedPendulumPyBulletEnv-v0_4.sh
job_InvertedPendulumSwing

In [23]:
import os
# job_sub_dir = './2020_01_12_00_44_38_td3_job_scripts'
jobs = os.listdir(job_sub_dir)
jobs.sort()
i=1
for job in jobs:
    code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
    print('{} ---- {}: {}'.format(i, job, code))
    i += 1


1 ---- job_AntMuJoCoEnv-v0_0.sh: 0
2 ---- job_AntMuJoCoEnv-v0_1.sh: 0
3 ---- job_AntMuJoCoEnv-v0_2.sh: 0
4 ---- job_AntMuJoCoEnv-v0_3.sh: 0
5 ---- job_AntMuJoCoEnv-v0_4.sh: 0
6 ---- job_AntPyBulletEnv-v0_0.sh: 0
7 ---- job_AntPyBulletEnv-v0_1.sh: 0
8 ---- job_AntPyBulletEnv-v0_2.sh: 0
9 ---- job_AntPyBulletEnv-v0_3.sh: 0
10 ---- job_AntPyBulletEnv-v0_4.sh: 0
11 ---- job_HalfCheetahMuJoCoEnv-v0_0.sh: 0
12 ---- job_HalfCheetahMuJoCoEnv-v0_1.sh: 0
13 ---- job_HalfCheetahMuJoCoEnv-v0_2.sh: 0
14 ---- job_HalfCheetahMuJoCoEnv-v0_3.sh: 0
15 ---- job_HalfCheetahMuJoCoEnv-v0_4.sh: 0
16 ---- job_HalfCheetahPyBulletEnv-v0_0.sh: 0
17 ---- job_HalfCheetahPyBulletEnv-v0_1.sh: 0
18 ---- job_HalfCheetahPyBulletEnv-v0_2.sh: 0
19 ---- job_HalfCheetahPyBulletEnv-v0_3.sh: 0
20 ---- job_HalfCheetahPyBulletEnv-v0_4.sh: 0
21 ---- job_HopperMuJoCoEnv-v0_0.sh: 0
22 ---- job_HopperMuJoCoEnv-v0_1.sh: 0
23 ---- job_HopperMuJoCoEnv-v0_2.sh: 0
24 ---- job_HopperMuJoCoEnv-v0_3.sh: 0
25 ---- job_HopperMuJoCoEnv-v0_4.