### Configuration Tutorial
we will show you have to create your own conf from bash

In [1]:
import argparse

def main():
    parser = argparse.ArgumentParser()
    # dataset
    parser.add_argument('--dataname', type=str, default='TripletData')
    parser.add_argument('--trainroot', type=str, default='data_IO/triplet.csv')
    parser.add_argument('--testroot', type=str, default='data_IO/test_pairs.csv')
    parser.add_argument('--predroot', type=str, default='data_IO/test_lr_pairs.csv')
    parser.add_argument('--matrixroot', type=str, default='data_IO/exp_data_LR.csv')
    parser.add_argument('--adjroot', type=str, default='data_IO/spatial_graph.csv')

    # model
    parser.add_argument('--modelname', type=str, default='TripletGraphModel')
    parser.add_argument('--input_dim', type=int, default=4000)
    parser.add_argument('--graph_dim', type=int, default=4000)
    parser.add_argument('--mlp_hid_dims', type=str, default='200,50,20')
    parser.add_argument('--graph_hid_dims', type=str, default='200,50,20')
    # parser.add_argument('--save_path', type=str, default='checkpoint/triplet/')

    # train
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--save_path', type=str, default='checkpoint/triplet/')
    parser.add_argument('--batch_size', type=int, default=2048)

    # test
    parser.add_argument('--test_save_path', type=str, default='checkpoint/triplet/best_f1.pth')
    # parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--pred', type=str, default='results/predict.csv')
    parser.add_argument('--emb1', type=str, default='results/embed_ligand.csv')
    parser.add_argument('--emb2', type=str, default='results/embed_receptor.csv')
    parser.add_argument('--threshold', type=float, default=0.5)

    # seed
    parser.add_argument('--seed', type=int, default=10)

    # yml name
    parser.add_argument('--ymlname', type=str, default='configure_gen.yml')

    opt = parser.parse_args()

    yml = open(opt.ymlname, 'w')
    yml.write('DATASET:\n')
    yml.write('  NAME: %s\n'%(opt.dataname))
    yml.write('  TRAIN_ROOT: %s\n'%(opt.trainroot))
    yml.write('  TEST_ROOT: %s\n'%(opt.testroot))
    yml.write('  PRED_ROOT: %s\n'%(opt.predroot))
    yml.write('  MATRIX_ROOT: %s\n'%(opt.matrixroot))
    yml.write('  ADJ_ROOT: %s\n'%(opt.adjroot))

    yml.write('MODEL:\n')
    yml.write('  NAME: %s\n'%(opt.modelname))
    yml.write('  INPUT_DIM: %d\n'%(opt.input_dim))
    yml.write('  GRAPH_DIM: %d\n'%(opt.input_dim))

    mlp_hid_dims = opt.mlp_hid_dims.split(',')
    mlp_hid_dims = [int(x) for x in mlp_hid_dims]
    yml.write('  MLP_HID_DIMS: [%d'%(mlp_hid_dims[0]))
    for d in mlp_hid_dims[1:]:
        yml.write(',%d'%(d))
    yml.write(']\n')

    graph_hid_dims = opt.graph_hid_dims.split(',')
    graph_hid_dims = [int(x) for x in graph_hid_dims]
    yml.write('  GRAPH_HID_DIMS: [%d'%(graph_hid_dims[0]))
    for d in graph_hid_dims[1:]:
        yml.write(',%d'%(d))
    yml.write(']\n')
    yml.write('  SAVE_PATH: %s\n'%(opt.save_path))

    yml.write('TRAIN:\n')
    yml.write('  LR: %f\n'%(opt.lr))
    yml.write('  EPOCHS: %d\n'%(opt.epochs))
    yml.write('  SAVE_PATH: %s\n'%(opt.save_path))
    yml.write('  BATCH_SIZE: %d\n'%(opt.batch_size))

    yml.write('TEST:\n')
    yml.write('  SAVE_PATH: %s\n'%(opt.test_save_path))
    yml.write('  BATCH_SIZE: %d\n'%(opt.batch_size))
    yml.write('  PRED: %s\n'%(opt.pred))
    yml.write('  EMB1: %s\n'%(opt.emb1))
    yml.write('  EMB2: %s\n'%(opt.emb2))
    yml.write('  THRESHOLD: %f\n'%(opt.threshold))

    yml.write('SEED: %d\n'%(opt.seed))

We have a python script named "gen_conf.py", which you can customize your own configuration for your dataset. The commands are like the following:
1. trainroot: a csv file with the (a, p, n) triplets
2. testroot: a csv file with the ligand-receptor pairs, we have labels for validation. 
3. predroot: a csv file to predict the whether a ligand-receptor pair is positive or not. We only have the data, without labels.
4. input_dim: the input dimension for mlp trunk
5. graph_dim: the input dimension for graph trunk
6. mlp_hid_dims: the hidden dimensions of mlp layers
7. graph_hid_dims: the hidden dimensions of graph layers
8. lr: the learning rate
9. epochs: the total rounds of training
10. save_path: path for check points
11. batch_size: the number of pairs for each batch
12. test_save_path: the selected checkpoint path
13. pred: the path for saving the prediction in the form of csv files.
14. emb1: the path for saving embedings of ligand in the form of csv files.
15. emb2: the path for saving embedings of receptor in the form of csv files.
16. threshold: the threshold to determine whether a L-R pair is positive or not.
17. seed: the fixed seed.
18. ymlname: name of the configuration yaml file.


a demo usage will be:
```
python3 gen_conf.py --ymlname configure_gen.yml
```