# DDPG-n-step Generate Jobs

In [None]:
import numpy as np
import os
CPU_NUM = 4
JOB_TIME = '0-10:00'
JOB_MEMORY = '12000M'
job_sub_dir = './job_scripts'
job_out_dir = './job_scripts_output'

In [None]:
os.makedirs(job_sub_dir)
os.makedirs(job_out_dir)

In [None]:
tasks = ['AntPyBulletEnv-v0', 'HalfCheetahPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0',
         'ReacherPyBulletEnv-v0', 'InvertedPendulumPyBulletEnv-v0', 'InvertedPendulumSwingupPyBulletEnv-v0', 
         'InvertedDoublePendulumPyBulletEnv-v0',
         'HumanoidPyBulletEnv-v0','HumanoidFlagrunPyBulletEnv-v0', 'HumanoidFlagrunHarderPyBulletEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'Walker2DMuJoCoEnv-v0', 'HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'HumanoidMuJoCoEnv-v0']

replay_size = ['1000000', '500000']
seeds = ['0', '1', '2', '3', '4']
n_step = [5, 8]
backup_method = ['avg_n_step', 'min_n_step', 'avg_n_step_exclude_1', '1_step', '2_step', '3_step', '4_step', 
                 '5_step', '6_step', '7_step', '8_step']
for s in seeds:
    for task in tasks:
        for b_s in replay_size:
            for n_s in n_step:
                for b_m in backup_method:
                    job_filename = 'job_{0}_{1}_{2}_{3}.sh'.format(task, s, b_s, b_m)
                    print(job_filename)
                    with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
                        job_file.write('#!/bin/bash\n')
                        job_file.write('#SBATCH --account=def-dkulic\n')
                        job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
                        job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
                        job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
                        job_file.write('#SBATCH --output=./job_scripts_output/ddpg_new_step_new_{0}_{1}_{2}_{3}_{4}_%N-%j.out        # %N for node name, %j for jobID\n'.format(task, s, b_s, n_s, b_m))
                        job_file.write('## Main processing command\n')
                        job_file.write('module load cuda cudnn \n')
                        job_file.write('source ~/tf_gpu/bin/activate\n')
                        job_file.write('python ./ddpg_n_step_new.py  --env {0} --seed {1} --replay_size {2} --n_step {3} --backup_method {4} --data_dir spinup_data_ddpg_n_step_new_PyBulletGym --exp_name ddpg_n_step_new_{0}_{1}_{2}'.format(task, s, b_s, n_s, b_m))


In [None]:
import os
job_sub_dir = './job_scripts'
jobs = os.listdir('./job_scripts')
jobs.sort()
i=1
for job in jobs:
    code = os.system('sbatch {}'.format(os.path.join(job_sub_dir, job)))
    print('{} ---- {}: {}'.format(i, job, code))
    i += 1


# Baseline

In [19]:
import numpy as np
import os
import time
from datetime import datetime
CPU_NUM = 4
JOB_TIME = '0-10:00'
JOB_MEMORY = '12000M'

In [22]:
timestample = str(datetime.now()).replace(' ','_').replace('-','_').replace(':','_').split('.')[0]
job_sub_dir = './{}_td3_job_scripts'.format(timestample)
job_out_dir = './{}_td3_job_scripts_output'.format(timestample)
data_out_dir = '{}_td3_PyBulletGym'.format(timestample)

if not os.path.exists(job_sub_dir):
    os.makedirs(job_sub_dir)
if not os.path.exists(job_out_dir):
    os.makedirs(job_out_dir)

In [23]:
tasks = ['AntPyBulletEnv-v0', 'HalfCheetahPyBulletEnv-v0', 'Walker2DPyBulletEnv-v0', 'HopperPyBulletEnv-v0',
         'ReacherPyBulletEnv-v0', 'InvertedPendulumPyBulletEnv-v0', 'InvertedPendulumSwingupPyBulletEnv-v0', 
         'InvertedDoublePendulumPyBulletEnv-v0',
         'HumanoidPyBulletEnv-v0','HumanoidFlagrunPyBulletEnv-v0', 'HumanoidFlagrunHarderPyBulletEnv-v0',
         'InvertedPendulumMuJoCoEnv-v0', 'InvertedDoublePendulumMuJoCoEnv-v0', 
         'Walker2DMuJoCoEnv-v0', 'HalfCheetahMuJoCoEnv-v0', 'AntMuJoCoEnv-v0', 'HopperMuJoCoEnv-v0',
         'HumanoidMuJoCoEnv-v0']

seeds = ['0', '1', '2', '3', '4']
for task in tasks:
    for s in seeds:
        job_filename = 'job_{0}_{1}.sh'.format(task, s)
        print(job_filename)
        with open(os.path.join(job_sub_dir, job_filename), 'w') as job_file:
            job_file.write('#!/bin/bash\n')
            job_file.write('#SBATCH --account=def-dkulic\n')
            job_file.write('#SBATCH --cpus-per-task={}    #Maximum of CPU cores per GPU request: 6 on Cedar, 16 on Graham.\n'.format(CPU_NUM))
            job_file.write('#SBATCH --mem={}               # memory per node\n'.format(JOB_MEMORY))
            job_file.write('#SBATCH --time={}            # time (DD-HH:MM)\n'.format(JOB_TIME))
            job_file.write('#SBATCH --output={0}/{1}_{2}_%N-%j.out        # %N for node name, %j for jobID\n'.format(job_out_dir, task, s))
            job_file.write('## Main processing command\n')
            job_file.write('module load cuda cudnn \n')
            job_file.write('source ~/tf_gpu/bin/activate\n')
            job_file.write('python ../algos_original/td3/td3.py  --env {0} --seed {1} --data_dir {2} --exp_name td3_{0}'.format(task, s, data_out_dir))


job_AntPyBulletEnv-v0_0.sh
job_AntPyBulletEnv-v0_1.sh
job_AntPyBulletEnv-v0_2.sh
job_AntPyBulletEnv-v0_3.sh
job_AntPyBulletEnv-v0_4.sh
job_HalfCheetahPyBulletEnv-v0_0.sh
job_HalfCheetahPyBulletEnv-v0_1.sh
job_HalfCheetahPyBulletEnv-v0_2.sh
job_HalfCheetahPyBulletEnv-v0_3.sh
job_HalfCheetahPyBulletEnv-v0_4.sh
job_Walker2DPyBulletEnv-v0_0.sh
job_Walker2DPyBulletEnv-v0_1.sh
job_Walker2DPyBulletEnv-v0_2.sh
job_Walker2DPyBulletEnv-v0_3.sh
job_Walker2DPyBulletEnv-v0_4.sh
job_HopperPyBulletEnv-v0_0.sh
job_HopperPyBulletEnv-v0_1.sh
job_HopperPyBulletEnv-v0_2.sh
job_HopperPyBulletEnv-v0_3.sh
job_HopperPyBulletEnv-v0_4.sh
job_ReacherPyBulletEnv-v0_0.sh
job_ReacherPyBulletEnv-v0_1.sh
job_ReacherPyBulletEnv-v0_2.sh
job_ReacherPyBulletEnv-v0_3.sh
job_ReacherPyBulletEnv-v0_4.sh
job_InvertedPendulumPyBulletEnv-v0_0.sh
job_InvertedPendulumPyBulletEnv-v0_1.sh
job_InvertedPendulumPyBulletEnv-v0_2.sh
job_InvertedPendulumPyBulletEnv-v0_3.sh
job_InvertedPendulumPyBulletEnv-v0_4.sh
job_InvertedPendulumSwing