_First cell: from jupyterlab template_

In [1]:
import sys
from pathlib import Path

def add_to_sys_path(this_path, up=False):
    """
    Prepend this_path to sys.path.
    If up=True, path refers to parent folder (1 level up).
    """
    if up:
        newp = Path(this_path).parent
        # check if src folder:
        src = newp.joinpath("src")
        if src.exists():
            newp = str(src)
        else:
            newp = str(newp)
    else:
        newp = str(Path(this_path)) 

    if newp not in sys.path:
        sys.path.insert(1, newp)
        print('Path added to sys.path: {}'.format(newp))

# if notebook inside another folder, eg ./notebooks:
nb_folder = 'notebooks'
add_to_sys_path(Path.cwd(), Path.cwd().name.startswith(nb_folder))

def get_project_dirs(which=['data', 'images'], nb_folder='notebooks'):
    dir_lst = []
    if Path.cwd().name.startswith(nb_folder):
        dir_fn = Path.cwd().parent.joinpath
    else:
        dir_fn = Path.cwd().joinpath     

    for d in which:
        DIR = dir_fn(d)
        if not DIR.exists():
            Path.mkdir(DIR)
        dir_lst.append(DIR)
    return dir_lst

#DIR_DATA, DIR_IMG = get_project_dirs()

import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
plt.ion()
plt.style.use('seaborn-v0_8-muted')
from pprint import pprint as pp

# Filtered dir() for method discovery:
def fdir(obj, start_with_str='_', exclude=True):
    return [d for d in dir(obj) if not d.startswith(start_with_str) == exclude]

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.display import HTML, Markdown #, IFrame

# autoreload extension
from IPython import get_ipython
ipython = get_ipython()
if 'autoreload' not in ipython.extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

Path added to sys.path: /home/cat/projects/MCCE_Scikit/src


---

---

# MCCE - MS Sampling
---

In [2]:
import time
import base
import mcce_io as io
import ms_sampling as sampling

In [3]:
DATA = Path.cwd().parent.joinpath("tests/data")
DATA

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data')

In [4]:
!ls {DATA}

head3.lst  ms_out  run.prm.record  step2_out.pdb


In [5]:
DATA = Path.cwd().parent.joinpath("tests/data")
DATA

# filepaths of inputs used by MS class:
h3_path = DATA.joinpath("head3.lst")
mcce_output_path = h3_path.parent
mcce_output_path

step2_path = mcce_output_path.joinpath("step2_out.pdb")
msout_path = mcce_output_path.joinpath("ms_out")
msout_path, msout_path.is_dir()

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data')

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data')

(PosixPath('/home/cat/projects/MCCE_Scikit/tests/data/ms_out'), True)

In [6]:
# filepaths of outputs:

pH = 5.0
Eh= 0.0
msout_file = io.get_msout_filename(mcce_output_path, pH, Eh)
msout_file

msout_file_dir = msout_file.parent.joinpath(msout_file.stem)
msout_file_dir

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms.txt')

PosixPath('/home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms')

In [7]:
start_time = time.time()

io.split_msout_file(mcce_output_path, pH, Eh)

end_time = time.time()
print("io.divide_msout_file() took {:.2f} mins".format((end_time - start_time)/60))

The ms_out file is already split into header and MCi files. Set `overwrite` to True to replace them.
io.divide_msout_file() took 0.00 mins


In [8]:
!ls {msout_file_dir}

MC0  MC1  MC2  MC3  MC4  MC5  header


In [9]:
#io.clear_folder(msout_file_dir)

# base.MC class

In [10]:
print(base.MS.__doc__)
print(base.MS.__init__.__doc__)

Uses split ms_out files.
MS.init

        Parameters:
            mcce_output_path (str): A MCCE simulation output folder.
            pH (int or float): A pH point.
            Eh (int or float): A Eh point.
            selected_MC (int): The index of an MC run; one of `range(constants.MONTERUNS)`.
            overwrite_split_files (bool): whether to redo the splitting of msout_file.
        


In [11]:
# create instance
start_time = time.time()

ms = base.MS(mcce_output_path, pH, Eh)

end_time = time.time()
print("Loading of base.MS instance took {:.2f} mins".format((end_time - start_time)/60))
print(ms)

Loading of base.MS instance took 0.14 mins
MS("/home/cat/projects/MCCE_Scikit/tests/data", 5.0, 0.0, selected_MC=0, overwrite_split_files=False)


In [12]:
fdir(ms)

['Eh',
 'N_ms',
 'N_uniq',
 'T',
 'confnames_by_iconfs',
 'conformers',
 'counts',
 'fixed_confs',
 'fixed_crg',
 'fixed_iconfs',
 'fixed_ne',
 'fixed_nh',
 'fixed_residue_names',
 'fname',
 'free_residue_names',
 'free_residues',
 'get_occ',
 'iconf_by_confname',
 'ires_by_iconf',
 'mcce_out',
 'method',
 'microstates',
 'msout_file_dir',
 'overwrite_split_files',
 'pH',
 'select_by_conformer',
 'select_by_energy',
 'selected_MC']

# ms sampling

In [13]:
fdir(sampling)

['Path',
 'base',
 'get_selected_confs',
 'io',
 'np',
 'pdbs_from_ms_samples',
 'sample_microstates',
 'sort_microstate_list']

In [14]:
n_sample_size = 5
ms_sort_by = "energy"
output_dir = msout_file_dir

In [15]:
# create pdbs from samples ms
start_time = time.time()

sampling.pdbs_from_ms_samples(ms,
                              mcce_output_path,
                              n_sample_size,
                              ms_sort_by,
                              output_dir,
                              list_files=True)

end_time = time.time()
print("`sampling.pdbs_from_ms_samples` with sample size={:,} took {:.2f} mins".format(n_sample_size, (end_time - start_time)/60))

PDB files creation over.
Files in /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms:

	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms278053.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms1.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms91192.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms183964.pdb
	 /home/cat/projects/MCCE_Scikit/tests/data/ms_out/pH5eH0ms/pdbs_from_ms/mc0_ms373222.pdb
`sampling.pdbs_from_ms_samples` with sample size=5 took 0.02 mins
