_Run the first cell as is._  (It contains helper functions & common imports.)

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from IPython.display import Markdown, Image

from functools import partial
import numpy as np
import pandas as pd

from pathlib import Path
from pprint import pprint as ptp
import sys
import time
import matplotlib as mpl
from matplotlib import pyplot as plt
plt.ion()
plt.style.use('seaborn-v0_8-muted')

print(f"Python ver: {sys.version}\nPython env: {Path(sys.prefix).name}")
print(f"Pandas ver: {pd.__version__}")
print(f"Currrent dir: {Path.cwd()}\n")

def add_to_sys_path(this_path, up=False):
    """
    Prepend this_path to sys.path.
    If up=True, path refers to parent folder (1 level up).
    """

    if up:
        newp = str(Path(this_path).parent)
    else:
        newp = str(Path(this_path))
    if newp not in sys.path:
        sys.path.insert(1, newp)
        print('Path added to sys.path: {}'.format(newp))


def fdir(obj, start_with_str='_', exclude=True):
    """Filtered dir() for method discovery."""
    return [d for d in dir(obj) if not d.startswith(start_with_str) == exclude]

def despine(which=['top','right']):
    """which ([str])): 'left','top','right','bottom'."""

    ax = plt.gca()
    for side in which:
        ax.spines[side].set_visible(False)
    return

def md_width_comment(w:int=120) -> str:
    """Width guide for composing md documents."""
    return f"<!-- dotted line width = {w}\n{'.'*w}-->"

def get_elapsed_time(start_t:time, message :str=None) -> str:
    elapsed = time.time() - start_t
    if message is None:
        return f"Elapsed time: {elapsed:,.2f} s ({elapsed/60:,.2f} min)."
    return f"{message} {elapsed:,.2f} s ({elapsed/60:,.2f} min)."

# autoreload extension
%load_ext autoreload
%autoreload 2

# Reference MCCE4 paths in order to enable module imports:

In [None]:
here = Path.cwd()

###### MODIFY:
# Where is MCCE4 w.r.t. the location of THIS notebook?

# Example:
MC4 = here.parent.parent.parent
MC4

In [None]:
# If MC4 is properly set, the following should be error-free:

bin_dir = MC4.joinpath("bin")
mcbin_dir = MC4.joinpath("MCCE_bin")
add_to_sys_path(mcbin_dir)
add_to_sys_path(bin_dir)

In [None]:
import argparse
import shutil

import mcce4.io_utils as mciou
from mcce4.mcce_benchmark import BENCH, N_PDBS, N_BATCH, RUNS_DIR, ANALYZE_DIR, FILES
from mcce4.mcce_benchmark import (
io_utils as iou,
pkanalysis,
comparison,
plots,
mcce_env as mcenv,
)

---
---
# Define MCCE benchmark folders for analysis and comparison

In [None]:
###### MODIFY as needed:

# Note:
# In this example, dir1 -> dry5idsl2 vs dir2 -> dry5idsl1 :: default level (l=1) will be the reference when comparing
# using bench_compare dir1 dir2

bdir1 = MC4.parent.joinpath("tests_bench", "dry5idsl2").resolve()
analyze1 = bdir1.joinpath("analysis")
bdir1.exists(), analyze1.exists()
bdir1
analyze1

bdir2 = MC4.parent.joinpath("tests_bench", "dry5idsl1").resolve()
analyze2 = bdir2.joinpath("analysis")
bdir2.exists(), analyze2.exists()
bdir2
analyze2

comp_dir = MC4.parent.joinpath("tests_bench","comp_dry_l2l1")

round2 = partial(round, ndigits=2)

In [None]:
!ls -l {bdir2}

---
# Note: the code below is using `bdir2`, `analyze2` and `comp_dir` as paths

---
---

# USE CASE 1: Redo the figures because they use the "wrong" conformer-making level
This may be needed if the actual conformer-making level is not what you need it to be, likely because you have created a benchmarking set by copying the structure from another one instead of copying the data files (see Gehan).

## Plotting functions that use the level argument:
  * plot_conf_thrup
  * plot_res_analysis
  * plot_pkas_fit: the level number is in its pkas stats dict argument: needs amended

## There are two ways to correct this situation.
### 1. Amend the saved command line arguments (pickled file), then:
 * A. Rerun the analysis/comparison at the command line.
 * B. Rerun the plot functions for updating the figures.

Consider this as a corrective measure: if done properly, the updated pickle file will reflect the intended setup.
### 2. Change the value of the level argument. Applies to plot_conf_thrup & plot_res_analysis.

Obtain all the arguments necessary to the plot function, then change the value of the level argument

### 1. Amend the saved command line arguments (pickled file)
In order to know what to correct, you need to load the file and inspect its contents:

In [None]:
# Create a backup copy. Even if not needed, it will indicate the original was amended.

setup_args_fp = bdir2.joinpath(FILES.CLI_ARGS_PKL.value)
bkp_fp = bdir2.joinpath(setup_args_fp.name + ".bkp")
shutil.copy(setup_args_fp, bkp_fp)

In [None]:
# When 'unpickled' directly (deserialized), the output type is 'argparse.Namespace', to get a dict we use the `vars` function:
setup_d = vars(iou.from_pickle(setup_args_fp))

In [None]:
# What's in it?
ptp(setup_d)

In [None]:
###### MODIFY as needed:

# Update conf_making_level number:
setup_d["conf_making_level"] = 2  # an integer, 1, 2, or 3

# Re-serialized to the same data type as the original:
iou.to_pickle(argparse.Namespace(**setup_d), setup_args_fp)

In [None]:
# Check updated data:
ptp(vars(iou.from_pickle(setup_args_fp)))

## Option 1.A. Rerun the analysis/comparison at the command line.
This is the easiest way.  
**If your analysis files are old, this is also the recommended option**:  
e.g. a newer version has the "data_stats" key in the pka stats dictionary to hold rmsd and correlation coeff of the two series irrespective of the outcome of the fit function (that's why it's in a separate key).

In a terminal window, cd to your benchmark folder and run: 
```
> bench_analyze pkdb_pdbs -bench_dir .
# or
> bench_analyze user_pdbs -bench_dir .
```

## Option 1.B. Rerun only the plot functions
This entails accessing existing analysis output files & setting each function's arguments.

In [None]:
!ls -l {analyze2}

---
### 1.B. Rerun `plots.plot_conf_thrup`

In [None]:
# What are its arguments?
plots.plot_conf_thrup?

In [None]:
# Perhaps backup the old file?
# alternate way to cpopying files in a notebook:
!cp {analyze2/"confs_throughput.png"} {analyze2/"confs_throughput.png.bkp"}

# get number of completed runs:
n_complete = len(iou.get_book_dirs_for_status(bdir2/"runs"/"book.txt"))
n_complete

level = 2   # the new level
# new pic (overwrite exisisting file):
conf_thruput_pic = analyze2.joinpath(FILES.FIG_CONFS_TP.value)

# Get the dataframe:
conf_thruput_fp = analyze2.joinpath(FILES.CONFS_THRUPUT.value)
thruput_df = iou.txt2df(conf_thruput_fp, header=0)
plots.plot_conf_thrup(
        thruput_df,
        n_complete,
        analyze2.parent.name,
        level=level,
        out_fp=conf_thruput_pic,
    )

# Load the new file:
Image(filename=conf_thruput_pic)

---
### 1.B. Rerun `plot_res_analysis`

In [None]:
# What are its arguments?
plots.plot_res_analysis?

In [None]:
# Perhaps backup the old files?
!cp {analyze2/"res_analysis.png"} {analyze2/"res_analysis.png.bkp"}
!cp {analyze2/"residues_stats.pickle"} {analyze2/"residues_stats.pickle.bkp"}

# load the saved residues stats dict:
res_stats_d = iou.from_pickle(analyze2.joinpath(FILES.RESIDUES_STATS_PKL.value))

# Redo the plot
lev = 2
matched_fp = analyze2.joinpath(FILES.MATCHED_PKAS_TXT.value)
new_pic = analyze2.joinpath(FILES.FIG_FIT_PER_RES.value)
plots.plot_res_analysis(
            matched_fp,
            res_stats_d,
            lev,
            out_fp=new_pic,
        )

# Load the new file:
Image(filename=new_pic)

---
### 1.B. Rerun `plot_pkas_fit`

In [None]:
# What are its arguments?
plots.plot_pkas_fit?

In [None]:
# Perhaps backup the old files?
!cp {analyze2/"res_analysis.png"} {analyze2/"res_analysis.png.bkp"}
!cp {analyze2/"matched_pkas_stats.pickle"} {analyze2/"matched_pkas_stats.pickle.bkp"}

# load the saved pkas stats dict:
d_stats = iou.from_pickle(analyze2.joinpath(FILES.MATCHED_PKAS_STATS_PKL.value))

# Change the level in the dict:
d_stats["level"] = (2, iou.levels2names[2])

matched_fp = analyze2.joinpath(FILES.MATCHED_PKAS_TXT.value)
pkfit_pic = analyze2.joinpath(FILES.FIG_FIT_ALLPKS.value)
plots.plot_pkas_fit(
    matched_fp,
    d_stats,
    out_fp=pkfit_pic)

# Load the new file:
Image(filename=pkfit_pic)

---
## Redoing comparison figures
The files will have a different parent folder.

In [None]:
!ls -l {comp_dir}

In [None]:
matched_fp = comp_dir.joinpath(FILES.MATCHED_PKAS_TXT.value)

# Perhaps backup the old files?
!cp {comp_dir/"res_analysis.png"} {comp_dir/"res_analysis.png.bkp"}
!cp {comp_dir/"residues_stats.pickle"} {comp_dir/"residues_stats.pickle.bkp"}
!cp {comp_dir/"pkas_fit.png"} {comp_dir/"pkas_fit.png.bkp"}
!cp {comp_dir/"matched_pkas_stats.pickle"} {comp_dir/"matched_pkas_stats.pickle.bkp"}

In [None]:
# Re-plot the residues fit:

# Get the input dict:
res_stats_d = iou.from_pickle(comp_dir.joinpath(FILES.RESIDUES_STATS_PKL.value))

comp_resfit = comp_dir.joinpath(FILES.FIG_FIT_PER_RES.value)
level = 2
plots.plot_res_analysis(
        matched_fp,
        res_stats_d,
        level=level,
        out_fp=comp_resfit,
    )

# Load the new file:
Image(filename=comp_resfit)

In [None]:
# NOTE: This may fail if the saved data is too old (i.e. from a previous version):

# Get the input dict:
d_stats = iou.from_pickle(comp_dir.joinpath(FILES.MATCHED_PKAS_STATS_PKL.value))

# Change the level in the dict:
d_stats["level"] = (2, iou.levels2names[2])

# Re-plot the pkas fit:
comp_pkfit = comp_dir.joinpath(FILES.FIG_FIT_ALLPKS.value)

plots.plot_pkas_fit(
    matched_fp,
    d_stats,
    out_fp=comp_pkfit,
    comparison=True)

Reset the next cell to code instead of raw if the previous cell ran without errors