In [1]:
import numpy as np
import os
import stat

BATCH_DIR = '/home/ci411/volume_estimation/batched_jobs'

In [2]:
train_script = '''
import sys
sys.path.append("/home/ci411/volume_estimation/")

import model_funcs
import torch
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

MODELS_DIR = '/scratch/ci411/sonos_rirs/models/'
FEATURES_DIR = '/scratch/ci411/sonos_rirs/features/'

feature_set = '{feature_set}'

model_dict = {{}}
model_dict['name'] = '{model_name}'
model_dict['notes'] = '{model_notes}'
model_dict['data_path'] = os.path.join(FEATURES_DIR, feature_set, 'feature_df.csv')
model_dict['model_path'] = os.path.join(MODELS_DIR, '{experiment_name}', model_dict['name'])

model_funcs.train_model(model_funcs.Baseline_Model, model_dict, epochs={epochs}, batch_size={batch_size}, lr_init={lr_init}, l2_reg={l2_reg}, overwrite=True, log={log}, sched_thres={sched_thres})
'''

#keys are feature_set, model_name, model_notes
#batch_size, lr_init, l2_reg, log, sched_thres

train_script_example = train_script.format(feature_set="cool_features",model_name="cool_model", model_notes="testing the formatting",\
                                           experiment_name='cool_experiment',\
                                           epochs=3000, batch_size=64, lr_init=1e-4, l2_reg=1e-4, log=False, sched_thres=1e-3)
print(train_script_example)


import sys
sys.path.append("/home/ci411/volume_estimation/")

import model_funcs
import torch
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

MODELS_DIR = '/scratch/ci411/sonos_rirs/models/'
FEATURES_DIR = '/scratch/ci411/sonos_rirs/features/'

feature_set = 'cool_features'

model_dict = {}
model_dict['name'] = 'cool_model'
model_dict['notes'] = 'testing the formatting'
model_dict['data_path'] = os.path.join(FEATURES_DIR, feature_set, 'feature_df.csv')
model_dict['model_path'] = os.path.join(MODELS_DIR, 'cool_experiment', model_dict['name'])

model_funcs.train_model(model_funcs.Baseline_Model, model_dict, epochs=3000, batch_size=64, lr_init=0.0001, l2_reg=0.0001, overwrite=True, log=False, sched_thres=0.001)



In [3]:
batch_script = '''#!/bin/bash

#SBATCH --job-name={job_name}
#SBATCH --nodes=1
#SBATCH --cpus-per-task=4
#SBATCH --mem=16GB
#SBATCH --time=47:59:59
#SBATCH --mail-user=chris.ick@nyu.edu
#SBATCH --export=NONE
#SBATCH --output="o_{job_name}-%j.out"
#SBATCH --gres=gpu:1

module purge
module load anaconda3/2020.07

source /home/ci411/.bashrc
conda activate s3d_env
python /home/ci411/volume_estimation/batched_jobs/{experiment_name}/{job_name}.py
'''

test_batch = batch_script.format(job_name="jobbyjob", experiment_name="science_time")
print(test_batch)

#!/bin/bash

#SBATCH --job-name=jobbyjob
#SBATCH --nodes=1
#SBATCH --cpus-per-task=4
#SBATCH --mem=16GB
#SBATCH --time=47:59:59
#SBATCH --mail-user=chris.ick@nyu.edu
#SBATCH --export=NONE
#SBATCH --output="o_jobbyjob-%j.out"
#SBATCH --gres=gpu:1

module purge
module load anaconda3/2020.07

source /home/ci411/.bashrc
conda activate s3d_env
python /home/ci411/volume_estimation/batched_jobs/science_time/jobbyjob.py



In [9]:
#set default hyperparameters
def_hyp = {}
def_hyp['epochs'] = 1000
def_hyp['batch_size'] = 512
def_hyp['lr_init'] = 1e-4
def_hyp['l2_reg'] = 1e-4
def_hyp['log'] = True
def_hyp['sched_thres'] = 1e-4

In [10]:
#vary batch size on baseline model
experiment_name = 'log_batch_size'
run_template = 'log_bs{}'

experiment_dir = os.path.join(BATCH_DIR, experiment_name)

if not os.path.exists(experiment_dir):
    os.makedirs(experiment_dir)

batch_sizes = [16,32,64,126,256,512,1028]
feature_set = '080322_10k_baseline'
runall = ""

for i, bs in enumerate(batch_sizes):
    job_name = run_template.format(i)
    notes = "varying batch size on baseline w/ batch_size {}".format(bs)
    train = train_script.format(feature_set=feature_set, model_name=job_name,\
                                model_notes=notes, epochs=100, batch_size=bs,\
                                lr_init=def_hyp['lr_init'], l2_reg=def_hyp['l2_reg'],\
                                log=def_hyp['log'], sched_thres=def_hyp['sched_thres'],\
                                experiment_name=experiment_name)
    
    batch = batch_script.format(job_name=job_name, experiment_name=experiment_name)
    
    train_path = os.path.join(experiment_dir, job_name + '.py')
    batch_path = os.path.join(experiment_dir, job_name + '.sbatch')
    runall_path = os.path.join(experiment_dir, 'runall.bat')
    
    runall += "sbatch {}\n".format(batch_path)
    
    with open(train_path, 'w') as f:
        f.write(train)
        
    with open(batch_path, 'w') as f:
        f.write(batch)
    
    with open(runall_path, 'w') as f:
        f.write(runall)
        
    st = os.stat(runall_path)
    os.chmod(runall_path, st.st_mode | stat.S_IEXEC)

In [5]:
#vary inital learning rate on baseline model
experiment_name = 'log_learning_rate'
run_template = 'log_lr{}'

experiment_dir = os.path.join(BATCH_DIR, experiment_name)

if not os.path.exists(experiment_dir):
    os.makedirs(experiment_dir)

feature_set = '080322_10k_baseline'
runall = ""
    
#set variations
learning_rates = np.logspace(-1, -8, 16)

#adjust iteration
for i, lr in enumerate(learning_rates):
    job_name = run_template.format(i)
    #rewrite notes
    notes = "varying initial learning rate w/ lr = {}".format(lr)
    #adjust parameter inputs
    train = train_script.format(feature_set=feature_set, model_name=job_name,\
                                model_notes=notes, epochs=500, batch_size=def_hyp['batch_size'],\
                                lr_init=lr, l2_reg=def_hyp['l2_reg'],\
                                log=def_hyp['log'], sched_thres=def_hyp['sched_thres'],\
                                experiment_name=experiment_name)
    
    #clear down
    batch = batch_script.format(job_name=job_name, experiment_name=experiment_name)
    
    train_path = os.path.join(experiment_dir, job_name + '.py')
    batch_path = os.path.join(experiment_dir, job_name + '.sbatch')
    runall_path = os.path.join(experiment_dir, 'runall.bat')
    
    runall += "sbatch {}\n".format(batch_path)
    
    with open(train_path, 'w') as f:
        f.write(train)
        
    with open(batch_path, 'w') as f:
        f.write(batch)
    
    with open(runall_path, 'w') as f:
        f.write(runall)
        
    st = os.stat(runall_path)
    os.chmod(runall_path, st.st_mode | stat.S_IEXEC)

In [7]:
#vary l2 reg on baseline model
experiment_name = 'log_l2_reg'
run_template = 'log_l2{}'

experiment_dir = os.path.join(BATCH_DIR, experiment_name)

if not os.path.exists(experiment_dir):
    os.makedirs(experiment_dir)

feature_set = '080322_10k_baseline'
runall = ""
    
#set variations
reg_lambdas = np.logspace(-1, -5, 10)

#adjust iteration
for i, l2 in enumerate(reg_lambdas):
    job_name = run_template.format(i)
    #rewrite notes
    notes = "varying l2 reg parameter w/ lambda {}".format(l2)
    #adjust parameter inputs
    train = train_script.format(feature_set=feature_set, model_name=job_name,\
                                model_notes=notes, epochs=500, batch_size=def_hyp['batch_size'],\
                                lr_init=def_hyp['lr_init'], l2_reg=l2,\
                                log=def_hyp['log'], sched_thres=def_hyp['sched_thres'],\
                                experiment_name=experiment_name)
    
    #clear down
    batch = batch_script.format(job_name=job_name, experiment_name=experiment_name)
    
    train_path = os.path.join(experiment_dir, job_name + '.py')
    batch_path = os.path.join(experiment_dir, job_name + '.sbatch')
    runall_path = os.path.join(experiment_dir, 'runall.bat')
    
    runall += "sbatch {}\n".format(batch_path)
    
    with open(train_path, 'w') as f:
        f.write(train)
        
    with open(batch_path, 'w') as f:
        f.write(batch)
    
    with open(runall_path, 'w') as f:
        f.write(runall)
        
    st = os.stat(runall_path)
    os.chmod(runall_path, st.st_mode | stat.S_IEXEC)

In [8]:
#vary scheduler threshold on baseline model
experiment_name = 'log_sched_thres'
run_template = 'log_st{}'

experiment_dir = os.path.join(BATCH_DIR, experiment_name)

if not os.path.exists(experiment_dir):
    os.makedirs(experiment_dir)

feature_set = '080322_10k_baseline'
runall = ""
    
#set variations
thresholds = np.logspace(-3,-8, 5)

#adjust iteration
for i, th in enumerate(thresholds):
    job_name = run_template.format(i)
    #rewrite notes
    notes = "varying scheduler thresholds with threshold {}".format(th)
    #adjust parameter inputs
    train = train_script.format(feature_set=feature_set, model_name=job_name,\
                                model_notes=notes, epochs=500, batch_size=def_hyp['batch_size'],\
                                lr_init=def_hyp['lr_init'], l2_reg=def_hyp['l2_reg'],\
                                log=def_hyp['log'], sched_thres=th,\
                                experiment_name=experiment_name)
    
    #clear down
    batch = batch_script.format(job_name=job_name, experiment_name=experiment_name)
    
    train_path = os.path.join(experiment_dir, job_name + '.py')
    batch_path = os.path.join(experiment_dir, job_name + '.sbatch')
    runall_path = os.path.join(experiment_dir, 'runall.bat')
    
    runall += "sbatch {}\n".format(batch_path)
    
    with open(train_path, 'w') as f:
        f.write(train)
        
    with open(batch_path, 'w') as f:
        f.write(batch)
    
    with open(runall_path, 'w') as f:
        f.write(runall)
        
    st = os.stat(runall_path)
    os.chmod(runall_path, st.st_mode | stat.S_IEXEC)