_Cell 1: from jupyterlab template_: run it

In [2]:
import sys
from pathlib import Path
import time
import numpy as np
from pprint import pprint as pp
import matplotlib as mpl
from matplotlib import pyplot as plt
plt.ion()
#plt.style.use('seaborn-v0_8-muted')
#from IPython.display import HTML, Markdown #, IFrame

# To get multiple outputs into 1 cell w/o using print:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# autoreload extension
from IPython import get_ipython

ipython = get_ipython()
if 'autoreload' not in ipython.extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

# -----------------------------------------
# TWO USEFUL FUNCTIONS:

def add_to_sys_path(this_path, up=False):
    """
    Prepend this_path to sys.path.
    If up=True, path refers to parent folder (1 level up).
    """
    if up:
        newp = Path(this_path).parent
        # check if src folder:
        src = newp.joinpath("src")
        if src.exists():
            newp = str(src)
        else:
            newp = str(newp)
    else:
        newp = str(Path(this_path))

    if newp not in sys.path:
        sys.path.insert(1, newp)
        print('Path added to sys.path: {}'.format(newp))

# Filtered dir() for method discovery:
def fdir(obj, start_with_str='_', exclude=True):
    return [d for d in dir(obj) if not d.startswith(start_with_str) == exclude]


_Cell 2: from jupyterlab template_: run it

In [3]:
# Insert current dir (or its parent) into sys.path so that modules in ../src
# can be imported:
# CHANGE THIS IF NEEDED:
# If this notebook inside another folder, eg ./notebooks:
nb_folder = 'notebooks'
add_to_sys_path(Path.cwd(), Path.cwd().name.startswith(nb_folder))

Path added to sys.path: /home/cat/projects/MCCE_Scikit/src


---

---

# MCCE - MS Sampling (using test data in ../tests/data/)
---

In [4]:
import base
import mcce_io as io
import ms_sampling as sampling

In [5]:
DATA = Path.cwd().parent.joinpath("tests/data")
DATA

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data')

In [6]:
!ls {DATA}

head3.lst  ms_out  run.prm.record  step2_out.pdb


In [7]:
# filepaths of inputs used by MS class:
h3_path = DATA.joinpath("head3.lst")
mcce_output_path = h3_path.parent
mcce_output_path

step2_path = mcce_output_path.joinpath("step2_out.pdb")
msout_path = mcce_output_path.joinpath("ms_out")
msout_path, msout_path.is_dir()

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data')

(PosixPath('/home/cat/projects/MCCE_Scikit/tests/data/ms_out'), True)

In [8]:
# filepaths of outputs:

pH = 5.0
Eh= 0.0
msout_file = io.get_msout_filename(mcce_output_path, pH, Eh)
msout_file

msout_file_dir = msout_file.parent.joinpath(msout_file.stem)
msout_file_dir

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms.txt')

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms')

In [9]:
start_time = time.time()

io.split_msout_file(mcce_output_path, pH, Eh)

end_time = time.time()
print("io.divide_msout_file() took {:.2f} mins".format((end_time - start_time)/60))

The ms_out file is already split into header and MCi files. Set `overwrite` to True to replace them.
io.divide_msout_file() took 0.00 mins


In [10]:
!ls {msout_file_dir}

MC0  MC1  MC2  MC3  MC4  MC5  header  pdbs_from_ms


In [12]:
pdbs_dir = msout_file_dir.joinpath("pdbs_from_ms")

In [13]:
!ls {pdbs_dir}

mc0_ms1.pdb	  mc0_ms278053.pdb  mc0_ms91192.pdb
mc0_ms183964.pdb  mc0_ms373222.pdb


In [15]:
io.clear_folder(pdbs_dir)
!ls {pdbs_dir}

# base.MC class

In [16]:
print(base.MS.__doc__)
print(base.MS.__init__.__doc__)

Uses split ms_out files.
MS.init

        Parameters:
            mcce_output_path (str): A MCCE simulation output folder.
            pH (int or float): A pH point.
            Eh (int or float): A Eh point.
            selected_MC (int): The index of an MC run; one of `range(constants.MONTERUNS)`.
            overwrite_split_files (bool): whether to redo the splitting of msout_file.
        


In [27]:
# create instance
start_time = time.time()

ms = base.MS(mcce_output_path, pH, Eh)

d = time.time() - start_time
print(f"Loading of base.MS instance took {d/60:.2f} mins or {d:.2f} seconds")
print(ms)

Loading of base.MS instance took 0.15 mins or 9.02 seconds
MS("/home/cat/projects/MCCE_Scikit/tests/data", 5.0, 0.0, selected_MC=0, overwrite_split_files=False)


In [28]:
# Public vars in MC:
fdir(ms)

['Eh',
 'N_ms',
 'N_uniq',
 'T',
 'confnames_by_iconfs',
 'conformers',
 'counts',
 'fixed_confs',
 'fixed_crg',
 'fixed_iconfs',
 'fixed_ne',
 'fixed_nh',
 'fixed_residue_names',
 'fname',
 'free_residue_names',
 'free_residues',
 'get_occ',
 'iconf_by_confname',
 'ires_by_iconf',
 'mcce_out',
 'method',
 'microstates',
 'msout_file_dir',
 'overwrite_split_files',
 'pH',
 'select_by_conformer',
 'select_by_energy',
 'selected_MC']

In [35]:
ms.method

'MONTERUNS'

# ms sampling

In [29]:
fdir(sampling)

['Path',
 'base',
 'get_pdb_remark',
 'get_selected_confs',
 'io',
 'np',
 'pdbs_from_ms_samples',
 'sample_microstates',
 'sort_microstate_list']

In [30]:
n_sample_size = 5
ms_sort_by = "energy"
output_dir = msout_file_dir

In [36]:
# create pdbs from samples ms
start_time = time.time()

sampling.pdbs_from_ms_samples(ms,
                              mcce_output_path,
                              n_sample_size,
                              ms_sort_by,
                              output_dir,
                              list_files=True)

d = time.time() - start_time
print(f"`sampling.pdbs_from_ms_samples` with sample size={n_sample_size:,} took {d/60:.2f} mins or {d:.2f} seconds")

PDB files creation over.
Files in /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms:

	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms278053.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms1.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms91192.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms183964.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms373222.pdb
`sampling.pdbs_from_ms_samples` with sample size=5 took 0.03 mins or 1.85 seconds


In [37]:
!head -n 20 ../tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms1.pdb


REMARK 250
REMARK 250 EXPERIMENTAL DETAILS
REMARK 250   EXPERIMENT TYPE               : MCCE simulation
REMARK 250   DATE OF DATA COLLECTION       : 23-Oct-23
REMARK 250   REMARK: DATE OF DATA COLLECTION is the date this pdb was created.
REMARK 250 EXPERIMENTAL CONDITIONS
REMARK 250   TEMPERATURE                   : 298.15 (K)
REMARK 250   PH                            : 5.00
REMARK 250   EH                            : 0.00
REMARK 250   METHOD                        : MONTERUNS
REMARK 250   SELECTED MONTERUN             : 0
REMARK 250   SELECTED MICROSTATE INDEX     : 1
REMARK 250   SELECTED MICROSTATE ENERGY    : 202.89 (kcal/mol)
REMARK 250
ATOM     12  C   LYS A0001_000   3.389   7.073  13.137   1.700       0.550      BK____M000 
ATOM     13  O   LYS A0001_000   3.213   7.476  14.285   1.400      -0.550      BK____M000 
ATOM     45  N   VAL A0002_000   4.060   7.689  12.193   1.500      -0.350      BK____M000 
ATOM     46  H   VAL A0002_000   4.220   7.233  11.317   1.000       0.