_Run the first 2 code cells without modifications_

In [4]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from IPython import get_ipython
from IPython.display import Markdown #, IFrame
# for presentations:
#display(HTML("<style>.container { width:100% !important; }</style>"))

import sys
from pathlib import Path
print('Python ver: {}\nPython env: {}'.format(sys.version, Path(sys.prefix).name))
print('Currrent dir: {}\n'.format(Path.cwd()))


def add_to_sys_path(this_path, up=False):

    """
    Prepend this_path to sys.path.
    If up=True, path refers to parent folder (1 level up).
    """

    if up:
        newp = str(Path(this_path).parent)
    else:
        newp = str(Path(this_path))

    if newp not in sys.path:
        sys.path.insert(1, newp)
        print('Path added to sys.path: {}'.format(newp))


import numpy as np
import pandas as pd
#pd.set_option("display.max_colwidth", 200)
import matplotlib as mpl
from matplotlib import pyplot as plt
#plt.ion()
plt.style.use('seaborn-v0_8-muted')
from pprint import pprint as pp

def fdir(obj, start_with_str='_', exclude=True):
    """Filtered dir() for method discovery."""
    return [d for d in dir(obj) if not d.startswith(start_with_str) == exclude]


def new_section(title='New section'):
    style = "text-align:center;background:#c2d3ef;padding:16px;color:#ffffff;font-size:2em;width:98%"
    return HTML('<div style="{}">{}</div>'.format(style, title))


def add_div(div_class, div_start, div_text, output_string=True):
    """
    Behaviour with default `output_string=True`:
    The cell is overwritten with the output string, but the cell mode is still in 'code' not 'markdown':
    ```
    [x]
    add_div('alert-warning', 'Tip: ', 'some tip here', output_string=True)
    [x]
    <div class="alert alert-warning"><b>Tip: </b>some tip here</div>
    ```
    The only thing to do is change the cell mode to Markdown.
    If `output_string=False`, the HTML output is displayed in an output cell.
    """

    accepted = ['alert-info', 'alert-warning', 'alert-danger']

    if div_class not in accepted:
        return HTML(f"""<div class="alert"><b>Wrong class:</b> `div_start` is one of {accepted}.
                    </div>""")
    div = f"""<div class="alert {div_class}"><b>{div_start}</b>{div_text}</div>"""
    if output_string:
        return get_ipython().set_next_input(div, 'markdown')
    else:
        return Markdown(div) #HTML(div)



# autoreload extension
ipython = get_ipython()
if 'autoreload' not in ipython.extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

Python ver: 3.11.5 | packaged by conda-forge | (main, Aug 27 2023, 03:34:09) [GCC 12.3.0]
Python env: mce
Currrent dir: /home/cat/projects/MCCE_Benchmarking/notebooks

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
add_to_sys_path(Path.cwd(), up=True)

---

In [6]:
from benchmark import audit
from benchmark import BENCH, MCCE_OUTPUTS

import getpass
import os
import shutil
import subprocess
from typing import Union

In [13]:
mcce_step_options = {
    "S1":{"msg":"Run mcce step 1, premcce to format PDB file to MCCE PDB format.",
          "--noter": {"default":False, "help":"Do not label terminal residues (for making ftpl).", "action":"store_true"},
          "--dry":   {"default":False, "help":"Delete all water molecules.", "action":"store_true"},
          },
    "S2":{"msg":"Run mcce step 2, make side chain conformers from step1_out.pdb.",
          "-l":      {"metavar":"level",
                      "type":int, "default":1,
                      "help":"Conformer level 1=quick (default), 2=medium, 3=full"},
          },
    "S3":{"msg":"Run mcce step 3, energy calculations, with multiple threads.",
          # should have been --r:
          "-r":      {"default":False, "help":"refresh opp files and head3.lst without running delphi", "action":"store_true"},
          "-c":      {"metavar":"('conf start', 'conf end')",
                      "type":int,
                      "default":[1, 99999], "nargs":2,
                       "help":"starting and ending conformer, default to 1 and 9999"},
          "-f":      {"metavar":"tmp folder", "default":"/tmp", "hel":"delphi temporary folder, default to /tmp"},
          "-p":      {"metavar":"processes", "type":int, "default":1,
                      "help":"run mcce with p number of processes; default: %(default)s."},
          },
    "S4":{"msg":"Run mcce step 4, Monte Carlo sampling to simulate a titration.",
          "--xts":   {"default":False, "help":"Enable entropy correction, default is false", "action":"store_true"},
          "--ms":    {"default":False, "help":"Enable microstate output", "action":"store_true"},
          "-t":      {"metavar":"ph or eh", "default":"ph", "help":"titration type: pH or Eh."},
          "-i":      {"metavar":"initial ph/eh", "default":"0.0", "help":"Initial pH/Eh of titration; default: %(default)s."},
          "-d":      {"metavar":"interval", "default":"1.0", "help":"titration interval in pJ or mV; default: %(default)s."},
          "-n":      {"metavar":"steps", "default":"15", "help":"number of steps of titration; default: %(default)s."},
          }
}


CLI_NAME = "mcce_bench"  # as per pyproject.toml
SUB_CMD1, SUB_CMD2 = "from_step1", "from_step3"
USAGE = f"{CLI_NAME} <sub-command for simulation start> <related args>\n"

DESC = f"""
    Launch a MCCE benchmarking job using curated structures from the pKa Database v1.

    The main command is {CLI_NAME!r} along with one of two sub-commands,
    which distinguishes the starting point for the MCCE simulation.
    - Sub-command {SUB_CMD1!r}: starts from step1 -> step4;
    - Sub-command {SUB_CMD2!r}: starts from step3 -> step4 :: NOT YET IMPLEMENTED!

"""

HELP_1 = f"Sub-command {SUB_CMD1!r} for starting the MCCE simulation from step1."
HELP_2 = f"Sub-command {SUB_CMD2!r} for starting the MCCE simulation from step3."

def bench_from_step1(args):
    """Benchmark setup and launch for 'from_step1' sub-command."""
    # TODO
    # setup folders
    # write <job_name>.sh
    # launch
    pass


def bench_from_step3(args):
    """Benchmark setup and launch for 'from_step3' sub-command."""
    # TODO later
    pass


def bench_parser():
    """Command line arguments parser with sub-commands for use in benchmarking.
    """

    def arg_valid_dirpath(p: str):
        """Return resolved path from the command line."""
        if not len(p):
            return None
        return Path(p).resolve()

    p = ArgumentParser(
        prog = f"{CLI_NAME} ",
        description = DESC,
        usage = USAGE,
        formatter_class = RawDescriptionHelpFormatter,
        epilog = ">>> END of %(prog)s.",
    )
    subparsers = p.add_subparsers(required=True,
                                  title='pipeline step commands',
                                  description='Subcommands of the MCCE-CDC processing pipeline',
                                  help='The 3 steps of the MCCE-CDC processing pipeline',
                                  dest='subparser_name'
                                 )

    # do_ms_to_pdbs
    sub1 = subparsers.add_parser(SUB_CMD1,
                                 formatter_class = RawDescriptionHelpFormatter,
                                  help=HELP_1)
    sub1.add_argument(
        "benchmark_dir",
        type = arg_valid_dirpath,
        help = """The user's choice of directory for setting up the benchmarking job(s); required.
        If the directory does not exists in the location where this cli is called, then it is
        created. Recommended name: "mcce_benchmarks"; this is where all subsequent jobs will
        reside as subfolders.
        """
    )
    sub1.add_argument(
        "job_name",
        type = str,
        help = """The descriptive name, devoid of spaces, for the current job (don't make it too long!); required.
        This job_name is be used to name the curent job in 'benchmark_dir' and name the script that launches the
        MCCE simulation in ./clean_pdbs folder.
        """
    )
    # always 'prot.pdb' as per soft-link setup: ln -s DIR/dir.pdb prot.pdb
    #sub1.add_argument(
    #    "-prot",
    #    metavar = "pdb",
    #    default = "prot.pdb",
    #    help = "The name of the pdb; default: %(default)s.",
    )
    sub1.add_argument(
        "--dry",
        default = False,
        help = "No water molecules.",
        action = "store_true"
    )
    sub1.add_argument(
        "--norun",
        default = False,
        action = "store_true",
        help = "Create run.prm without running the step"
    )
    sub1.add_argument(
        "-e",
        metavar = "/path/to/mcce",
        default = "mcce",
        help = "Location of the mcce executable, i.e. which mcce; default: %(default)s.",
    )
    sub1.add_argument(
        "-eps",
        metavar = "epsilon",
        default = "4.0",
        help = "Protein dielectric constant; default: %(default)s.",
    )
    sub1.add_argument(
        "-u",
        metavar = "Comma-separated list of Key=Value pairs.",
        default = "",
        help = """Any comma-separated KEY=var from run.prm; e.g.:
        -u HOME_MCCE=/path/to/mcce_home,H2O_SASCUTOFF=0.05,EXTRA=./extra.tpl; default: %(default)s.
        Note: No space after a comma!"""},

    #sub1.add_argument(
    #    "-msout_file",
    #    type = str,
    #    default = "pH7eH0ms.txt",
    #    help = "Name of the mcce_dir/ms_out/ microstates file, `pHXeHYms.txt'; default: %(default)s.""",
    #)

    # bind sub1 parser with its related function:
    sub1.set_defaults(func=bench_from_step1)

    # later:
    #sub2 = subparsers.add_parser(SUB_CMD2,
    #                              formatter_class = RawDescriptionHelpFormatter,
    #                              help=HELP_2)

    return p


In [14]:
pp(mcce_step_options)

{'S1': {'--dry': {'action': 'store_true',
                  'default': False,
                  'help': 'Delete all water molecules.'},
        '--norun': {'action': 'store_true',
                    'default': False,
                    'help': 'Create run.prm but do not run step 1.'},
        '--noter': {'action': 'store_true',
                    'default': False,
                    'help': 'Do not label terminal residues (for making '
                            'ftpl).'},
        '-d': {'default': '4.0',
               'help': 'protein dielectric constant; default: %(default)s.',
               'metavar': 'epsilon'},
        '-e': {'default': 'mcce',
               'help': 'mcce executable location; default: %(default)s.',
               'metavar': '/path/to/mcce'},
        '-u': {'default': '',
               'help': 'Any comma-separated KEY=var from run.prm; e.g.:\n'
                       '                     -u '
                       'HOME_MCCE=/path/to/mcce_home,H2O_SASCU

---
---

# Prep of the "master" pdbs folder, `BENCH_PDBS`:
 * Remove any MCCE output files or folder along with prot.pdb
---

## tests

In [49]:
import random

In [95]:
matched_pks = []
for i in range(10):
    matched_pks.append((random.choice("ABCDRGWSX"),
                         random.choice([3.2, 5.1, 6., 4.4, 7.2]),
                        random.choice([3.2, 5.1, 6., 4.4, 7.2]*2)))
matched_pks

[('R', 3.2, 3.2),
 ('W', 5.1, 7.2),
 ('B', 3.2, 4.4),
 ('C', 4.4, 7.2),
 ('A', 4.4, 7.2),
 ('D', 6.0, 7.2),
 ('W', 7.2, 7.2),
 ('R', 4.4, 6.0),
 ('W', 3.2, 5.1),
 ('W', 3.2, 3.2)]

In [33]:
pka_dict = experimental_pkas_to_dict(WT)
len(pka_dict)
list(pka_dict.keys())[:10]

1214

[('135L', 'ASP-A0018_'),
 ('135L', 'GLU-A0035_'),
 ('135L', 'GLU-A0007_'),
 ('135L', 'ASP-A0119_'),
 ('135L', 'ASP-A0087_'),
 ('135L', 'ASP-A0052_'),
 ('1A2P', 'CTR-C0110_'),
 ('1A2P', 'HIS+C0102_'),
 ('1A2P', 'ASP-C0101_'),
 ('1A2P', 'ASP-C0086_')]