# This jupyter notebook contains examples of
- some basic functions related to Global Distance Test (GDT) analyses
- local accuracy plot

In [None]:
%matplotlib notebook

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import MDAnalysis as mda
import pyrexMD.misc as misc
import pyrexMD.core as core
import pyrexMD.topology as top
import pyrexMD.analysis.analysis as ana
import pyrexMD.analysis.gdt as gdt

misc.apply_matplotlib_rc_settings()

We define MDAnalysis universes to handle data. In this case we define:
- ref: universe with reference structure
- mobile: universe with trajectory

In [None]:
pdb = "files/traj/2hba_ChainB_ref.pdb"
tpr = "files/traj/traj_protein.tpr"
traj = "files/traj/traj_protein.xtc"

ref = mda.Universe(pdb)
mobile = mda.Universe(tpr, traj)

check trajectory via trajectory viewer

In [None]:
tv = core.iPlayer(mobile)
tv()

# Global Distance Test (GDT) Analysis
first we norm and align the universes (shift res ids, atom ids) and run the Global Distance Test

In [None]:
top.norm_and_align_universe(mobile, ref)

GDT = gdt.GDT(mobile, ref)
GDT_percent, GDT_resids, GDT_cutoff, RMSD, FRAME = GDT

Now we can calculate individual GDT scores
- TS: Total Score
- HA: High Accuracy

In [None]:
GDT_TS = gdt.get_GDT_TS(GDT_percent)
GDT_HA = gdt.get_GDT_HA(GDT_percent)

We can print the scores in a table to take a quick look on the content

In [None]:
frames = [i for i in range(len(GDT_TS))]

misc.cprint("GDT TS    GDT HA    frame", "blue")
_ = misc.print_table([GDT_TS, GDT_HA, frames], spacing=10, verbose_stop=10)

alternatively we can also first rank the scores and print the table sorted by rank

In [None]:
SCORES = gdt.GDT_rank_scores(GDT_percent, ranking_order="GDT_TS", verbose=False)
GDT_TS_ranked, GDT_HA_ranked, GDT_ndx_ranked = SCORES

misc.cprint("GDT TS    GDT HA    frame", "blue")
_ = misc.print_table([GDT_TS_ranked, GDT_HA_ranked, GDT_ndx_ranked], spacing=10, verbose_stop=10)

To plot the GDT_TS curve we can use a generalized PLOT function:

In [None]:
fig, ax = ana.PLOT(xdata=frames, ydata=GDT_TS, xlabel="Frame", ylabel="GDT TS")

Histrograms are often also important as they can be used to extract probabilities of protein conformations

In [None]:
fig, ax, hist = ana.plot_hist(GDT_TS, n_bins=20, xlabel="GDT TS", ylabel="Counts")

# Local Accuracy Plot
Figure showing local accuracy of models at specified frames to identify which parts of a structure are good or bad refined.

In [None]:
# edit text box positions of labels "Frame", "TS", "HA"
text_pos_kws = {"text_pos_Frame": [-32, -0.3],
                "text_pos_TS": [-15.6, -0.3],
                "text_pos_HA": [-7, -0.3],
                "show_frames": True}

# plot
_ = gdt.plot_LA(mobile, ref, GDT_TS_ranked, GDT_HA_ranked, GDT_ndx_ranked, **text_pos_kws)