# This jupyter notebook contains a basic example of
- how to setup contact-guided REX simulations in `Gromacs` for production run on HPC cluster

Note: you should be already familiar with:
- setup of MD simulations in `Gromacs`
- concept of (normal and contact-guided) REX simulations

In [None]:
%matplotlib notebook

from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import MDAnalysis as mda
import pyrexMD.core as core
import pyrexMD.misc as misc
import pyrexMD.topology as top
import pyrexMD.analysis.analysis as ana
import pyrexMD.analysis.contacts as con
import pyrexMD.analysis.gdt as gdt
import pyrexMD.rex as rex
import pyrexMD.gmx as gmx
import glob
misc.apply_matplotlib_rc_settings()

## Setup work directory and paths

In [None]:
### change if necessary

root = f"files/rex"
root = misc.cd(root)

ref_pdb0 = f"1lmb_Chain4.pdb"
pdbid = ana.get_PDBid(ref_pdb0)

#misc.mkdir(f"./important_files/")
score_fin = f"{pdbid.upper()}.rr"

## Inspect known reference structure

In [None]:
u = mda.Universe(ref_pdb0)
u
tv = core.iPlayer(u)
tv()

## Setup necessary files and folders

In [None]:
# create ref (apply forcefield)
ref_pdb = f"{misc.get_filedir(ref_pdb0)}/{misc.get_base(ref_pdb0)}_ref.pdb"
ref_pdb = gmx.get_ref_structure(ref_pdb0, ref_pdb)
gmx.clean_up(misc.get_filedir(ref_pdb), verbose=False)
gmx.clean_up(verbose=False)
print()

# apply forcefield on decoys
decoy_dir = f"{root}/decoys"
decoy_dir = rex.apply_ff_best_decoys(decoy_dir) # overwrite variable
decoy_paths = glob.glob(f"{decoy_dir}/*pdb")

# assign decoys to rex_i folders
rex.assign_best_decoys(decoy_dir, verbose=False)

## RESIDUE Analysis (based on rex_1 decoy)

In [None]:
# quick look at reference model
ref0 = mda.Universe(ref_pdb0)
tv = core.iPlayer(ref0)
tv()

In [None]:
# align reference and mobile topologies (shift resids, atomids)
ref = mda.Universe(ref_pdb)
mobile = mda.Universe(decoy_paths[0])

top.align_resids(mobile, ref)

In [None]:
# compare residues of mobile and ref
top.check_residues(mobile, ref)

In [None]:
# obtain matching selection strings for reference and mobile base 
# (important if e.g. mobile is part of reference, i.e. not full structure)
sel1, sel2 = top.check_matching_selection(mobile, ref, sel="protein and name CA")

## TPR Analysis

In [None]:
score_fin = f"{pdbid.upper()}.rr"
_ = con.plot_DCA_TPR(ref, score_fin, n_DCA=len(ref.residues), DCA_cols=(0,1), pdbid=f"{pdbid} reference")

The figure shows:
- blue line: TPR
- red line: 75% cutoff threshold (TPR of used number of contacts should be above 75% for contact-guided REX, see  https://doi.org/10.1371/journal.pone.0242072)
- orange lines: suggested/guessed optimum number of contacts and the corresponding TPR
- orange region: suggested region of interest between L/2 and L contacts


## test if all REX pdbs have equal topologies

In [None]:
REX_DIRS = rex.get_REX_DIRS("./", realpath=True)
REX_PDBS = rex.get_REX_PDBS("./", realpath=True)

rex.check_REX_PDBS(REX_PDBS, ref_pdb=ref_pdb)

In [None]:
rex.check2_REX_PDBS(REX_PDBS, ref_pdb=ref_pdb, verbose=False)

## Workflow: get system parameters
obtain parameters for REX setup with different start configurations:
- fixed box dimension
- fixed number of solution molecules

In [None]:
# dump mdp files (min.mdp, nvt.mdp, npt.mdp, rex.mdp) into cwd
# misc.cp("path/to/mdp/files", ".")

In [None]:
boxsize, maxsol = rex.WF_get_system_parameters(wdir="./rex_0_get_system_parameters/")

## populate replicas with decoys

In [None]:
# apply workflow for REX setup
rex.WF_REX_setup(rex_dirs=rex_dirs, boxsize=boxsize, maxsol=maxsol, verbose=False, verbose_gmx=False)

In [None]:
# apply energy minimization (limit number of steps to 10 for this example)
rex.WF_REX_setup_energy_minimization(rex_dirs=rex_dirs, nsteps=10, verbose=False)

## modify topology:
- use rex_1 as template for all replicas
(different start configurations but fixed boxsize and fixed number of solution molecules)
- add bias contacts to topology

In [None]:
n_DCA = 70   # check TPR Analysis plot for ideal number
misc.cd(root)

RES_PAIR, ATOM_PAIR = top.DCA_res2atom_mapping(ref_pdb=ref_pdb, DCA_fin=score_fin, n_DCA=n_DCA, usecols=(0,1), default_dir="./important_files")

In [None]:
# print table with bias contacts (mapping data)
misc.cprint("\nRES PAIR    ATOM PAIR", "blue")
_ = misc.print_table([RES_PAIR, ATOM_PAIR], spacing=12)

In [None]:
# modify topology of each replica (add bias contacts)
for ndx, rex_dir in enumerate(rex_dirs, start=1):
    top.DCA_modify_topology(top_fin=f"{misc.relpath(rex_dir)}/topol.top", 
                            DCA_used_fin=f"important_files/{pdbid.upper()}_DCA_used.txt",
                            k=10, save_as=f"{misc.relpath(rex_dir)}/topol_mod.top")
    
    
    # save one topology for reference
    if ndx == 1:
        misc.cp(f"{misc.relpath(rex_dir)}/topol_mod.top", "./important_files/")

## prepare REX run files (temps, mdp, tpr)

In [None]:
# generate temperature distrubution and save log file
rex_dirs = rex.get_REX_DIRS()
rex.prep_REX_temps(T_0=280, n_REX=len(rex_dirs), k=0.006)

In [None]:
# prepare REX mdp files with temperature distrubution
rex.prep_REX_mdp(main_dir="./", n_REX=len(rex_dirs))

In [None]:
# prepare REX tpr files with modified topology
rex.prep_REX_tpr(main_dir="./", n_REX=len(rex_dirs))

In [None]:
# next: upload REX MD files on HPC and execute production run