# C33_200

In [12]:
import sys
from pathlib import Path
home = str(Path.home())
core_dir = home+'/repositories/ai-x/core'
sys.path.insert(0, core_dir)

from misc import check_output_dir

# control variables here 
chembl_version = 'C33'
n = 200
n_models = 10
pre_modeldir = "/data/Shilab/sjwon3789/selectivity_models"
pre_datadir = "/data/Shilab/sjwon3789/selectivity_datasets"


cpu = 24
memory = 120
days = 1
hours = 0
mode = "class"
methods = ["xgb", "rf"]
stage = "same_buildmodel"
#stage = "prediction"


####################################### CODE ###################################################
model_dir = f'{pre_modeldir}/models_{chembl_version}_{n}'
datadir = f"{pre_datadir}/{chembl_version}_{n}"

models = ['models_D2', 'models_D3', 'models_D2_overlap', 'models_D3_overlap', 
                          'models__ratio_D2D3']
dict_dataset = {'models_D2': 'dataset_D2_antagonist_Ki', 
                'models_D3': 'dataset_D3_antagonist_Ki', 
                'models_D2_overlap': 'dataset_overlap_D2_antagonist_Ki',
               'models_D3_overlap':'dataset_overlap_D3_antagonist_Ki',
               'models__ratio_D2D3':'dataset__ratio_D3_antagonist_Ki_D2_antagonist_Ki'}
dict_job_name = {'models_D2': 'D2', 
                'models_D3': 'D3', 
                'models_D2_overlap': 'D2O',
               'models_D3_overlap':'D3O',
               'models__ratio_D2D3':'DR'}

for method in methods:
    for model in models:
        check_output_dir(f"{model_dir}/{model}", keep_old = False)
        # script to run every single model
        with open(f"{model_dir}/{model}/all_{method}.sh", "w") as f:
            f.write("#!/bin/bash\n")
            f.write("for i in {0..9}\n")
            f.write("do\n")
            f.write("echo $i\n")
            f.write("cd model_$i\n")
            f.write(f"sbatch do_{method}.sh\n")
            f.write("cd ..\n")
            f.write("done\n")
        # script to run every single predictions for models for one set of dataset models
        with open(f"{model_dir}/{model}/all_{method}_pred.sh", "w") as f:
            f.write("#!/bin/bash\n")
            f.write("for i in {0..9}\n")
            f.write("do\n")
            f.write("echo $i\n")
            f.write("cd model_$i\n")
            f.write(f"sbatch do_{method}_pred.sh\n")  
            f.write("cd ..\n")
            f.write("done\n")

        for i in range(n_models):
            path = f"{model_dir}/{model}/model_{i}"
            check_output_dir(path, keep_old = False)

            filename = 'pubdata'
            val_filename = 'val'
            dataset = f"{datadir}/{dict_dataset[model]}/{filename}"
            val_dataset = f"{datadir}/{dict_dataset[model]}/{val_filename}"
            if mode == "class":
                dataset = dataset+"_class"
                val_dataset = val_dataset+"_class"

            # script to run one dnn model only
            with open(path+f'/do_{method}.sh', 'w') as f:
                f.write('#!/bin/bash\n')
                f.write(f"#SBATCH --job-name={dict_job_name[model]}{i}_{method}\n")
                f.write(f"#SBATCH --partition=norm\n")
                f.write(f"#SBATCH --time={days}-{hours}:00:00\n")
    #             f.write(f"#SBATCH --gres=gpu:a100:1\n")
                f.write(f"#SBATCH --cpus-per-task={cpu}\n")
                f.write(f"#SBATCH --mem={memory}g\n\n")
    #             f.write(f"#SBATCH --exclusive\n\n")
                f.write(f"/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s {stage} -m {mode} -x {method} -t 0.15 -r 1 -n 1 -e 0:1 -i {dataset}{i}\n")
                f.write(f"#/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s {stage} -m {mode} -x {method} -t 0   -r 1 -n 1 -e 0:1 -i {dataset}{i}\n")
                f.write(f"#/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s shap -m {mode} -x {method} -t 0   -r 1 -n 1 -e 0:1 -i {dataset}{i}\n")

            # script to run one prediction 
            with open(path+f"/do_{method}_pred.sh", "w") as f:
                f.write("#!/bin/bash\n")
                f.write(f"#SBATCH --job-name=P{dict_job_name[model]}{i}_{method}\n")
                f.write(f"#SBATCH --mem=40g\n")
                f.write("#SBATCH --partition=quick\n")
                f.write(f"/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s prediction -m {mode} -x {method} -t 0.15 -r 1 -n 1 -e 0:10 -d {dataset}{i}\n")
                f.write(f"/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s prediction -m {mode} -x {method} -t 0    -r 1 -n 1 -e 0:10 -d {val_dataset}{i}\n")

# script to run every single predictions within the ChEMBL folder
with open(f'{model_dir}/chembl_predictions.sh', 'w') as f:
    for method in ["xgb", "rf"]:
        f.write('#!/bin/bash\n')
        f.write("cd models_D2\n")
        f.write(f"bash all_{method}_pred.sh\n")
        f.write("cd ..\n")

        f.write("cd models_D3\n")
        f.write(f"bash all_{method}_pred.sh\n")
        f.write("cd ..\n")

        f.write("cd models__ratio_D2D3\n")
        f.write(f"bash all_{method}_pred.sh\n")
        f.write("cd ..\n")
        

# script to run every single predictions within the ChEMBL folder
with open(f'{model_dir}/xgb_rf_models.sh', 'w') as f:
    f.write('#!/bin/bash\n')
    f.write("cd models_D2\n")
    f.write(f"bash all_xgb.sh\n")
    f.write(f"bash all_rf.sh\n")
    f.write("cd ..\n")

    f.write("cd models_D3\n")
    f.write(f"bash all_xgb.sh\n")
    f.write(f"bash all_rf.sh\n")
    f.write("cd ..\n")

    f.write("cd models__ratio_D2D3\n")
    f.write(f"bash all_xgb.sh\n")
    f.write(f"bash all_rf.sh\n")
    f.write("cd ..\n")

# C33_0

In [11]:
import sys
from pathlib import Path
home = str(Path.home())
core_dir = home+'/repositories/ai-x/core'
sys.path.insert(0, core_dir)

from misc import check_output_dir

# control variables here 
chembl_version = 'C33'
n = 0 # how much you're reserving as validation dataset 
n_models = 10
#output directory, where your scripts are created
pre_modeldir = "/data/Shilab/sjwon3789/selectivity_models"
pre_datadir = "/data/Shilab/sjwon3789/selectivity_datasets"

model_dir = pre_modeldir+f'/models_{chembl_version}_{n}/'
cpu = 24
memory = 120
days = 2
hours = 0
mode = "class"
methods = ["xgb", "rf"]
stage = "same_buildmodel"
#stage = "prediction"




####################################### CODE ###################################################
models = ['models_D2', 'models_D3', 'models__ratio_D2D3']
dict_dataset = {models[0]: 'dataset_D2_antagonist_Ki', 
                models[1]: 'dataset_D3_antagonist_Ki',
               models[2]:'dataset__ratio_D3_antagonist_Ki_D2_antagonist_Ki'}
dict_job_name = {models[0]: 'D2', 
                models[1]: 'D3', 
               models[2]:'DR'}

for method in methods:
    for model in models:
        model_dir = f'{pre_modeldir}/models_{chembl_version}_{n}'
        check_output_dir(f"{model_dir}/{model}", keep_old = False)
        # script to run every single dnn model
        with open(f"{model_dir}/{model}/all_{method}.sh", "w") as f:
            f.write("#!/bin/bash\n")
            f.write("for i in {0..9}\n")
            f.write("do\n")
            f.write("echo $i\n")
            f.write(f"sbatch do_{method}$i.sh\n")
            f.write("done\n")
    #     with open(f"{model_dir}/{model}/all_shap_{method}.sh", "w") as f:
    #         f.write("#!/bin/bash\n")
    #         f.write("for i in {0..9}\n")
    #         f.write("do\n")
    #         f.write("echo $i\n")
    #         f.write(f"sbatch shap_dnn$i.sh\n")
    #         f.write("done\n")
        # script to run every single predictions for dnn models for one set of dataset models
        for i in range(n_models):
            path = f"{model_dir}/{model}"

            filename = 'pubdata'
            val_filename = 'val'

            datadir = f"{pre_datadir}/{chembl_version}_{n}/{dict_dataset[model]}"
            dataset = f"{datadir}/{filename}"
            val_dataset = f"{datadir}/{val_filename}"
            if mode == "class":
                dataset = dataset+"_class"
                val_dataset = val_dataset+"_class"

            # script to run one dnn model only
            with open(path+f'/do_{method}{i}.sh', 'w') as f:
                f.write('#!/bin/bash\n')
                f.write(f"#SBATCH --job-name={dict_job_name[model]}{i}_{method}\n")
                f.write(f"#SBATCH --partition=norm\n")
                f.write(f"#SBATCH --time={days}-{hours}:00:00\n")
    #             f.write(f"#SBATCH --gres=gpu:a100:1\n")
                f.write(f"#SBATCH --cpus-per-task={cpu}\n")
                f.write(f"#SBATCH --mem={memory}g\n\n")
    #             f.write(f"#SBATCH --exclusive\n\n")
                f.write(f"#/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s {stage} -m {mode} -x {method} -t 0.15 -r 1 -n 10 -e {i}:{i+1} -i {dataset}0\n")
                f.write(f"#/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s {stage} -m {mode} -x {method} -t 0   -r 10 -n 1 -e {i}:{i+1} -i {dataset}0\n")
                f.write(f"#/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s shap -m {mode} -x {method} -t 0   -r 10 -n 1 -e {i}:{i+1} -i {dataset}0\n")
            # script to run one prediction 
            with open(path+f"/do_{method}_pred.sh", "w") as f:
                f.write("#!/bin/bash\n")
                f.write(f"#SBATCH --job-name=P{dict_job_name[model]}{i}_{method}\n")
                f.write("#SBATCH --partition=quick\n")
                f.write(f"/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s prediction -m {mode} -x {method} -t 0.15 -r 1 -n 10 -e 0:10 -d {dataset}0\n")
                f.write(f"/data/Shilab/apps/anaconda3-2020.02-py3.7.6/bin/python $HOME/repositories/ai-x/core/run_buildmodel.py -s prediction -m {mode} -x {method} -t 0    -r 10 -n 1 -e 0:10 -d {val_dataset}0\n")


# script to run every single predictions within the ChEMBL folder
with open(f'{model_dir}/chembl_predictions.sh', 'w') as f:
    for method in ["xgb", "rf"]:
        f.write('#!/bin/bash\n')
        f.write("cd models_D2\n")
        f.write(f"bash all_{method}_pred.sh\n")
        f.write("cd ..\n")

        f.write("cd models_D3\n")
        f.write(f"bash all_{method}_pred.sh\n")
        f.write("cd ..\n")

        f.write("cd models__ratio_D2D3\n")
        f.write(f"bash all_{method}_pred.sh\n")
        f.write("cd ..\n")

# script to run every single predictions within the ChEMBL folder
with open(f'{model_dir}/xgb_rf_models.sh', 'w') as f:
    f.write('#!/bin/bash\n')
    f.write("cd models_D2\n")
    f.write(f"bash all_xgb.sh\n")
    f.write(f"bash all_rf.sh\n")
    f.write("cd ..\n")

    f.write("cd models_D3\n")
    f.write(f"bash all_xgb.sh\n")
    f.write(f"bash all_rf.sh\n")
    f.write("cd ..\n")

    f.write("cd models__ratio_D2D3\n")
    f.write(f"bash all_xgb.sh\n")
    f.write(f"bash all_rf.sh\n")
    f.write("cd ..\n")