# Import libraries and set paths

In [1]:
import MDAnalysis as mda
from MDAnalysis.analysis import distances
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import chilife as xl
from IPython.display import clear_output
from scipy.stats import norm
import pandas as pd
from copy import copy
import gc
import glob
import os
from IPython.display import set_matplotlib_formats
plt.style.use('default')
set_matplotlib_formats('retina')

In [None]:
foldname = f''
filename = f''
dataset_name = f""
md_dir = "/home/biomd/Documents/Porph/MD/"
path_to_folder = f"{md_dir}{foldname}/"
orig_traj = mda.Universe(path_to_folder+filename+".gro",path_to_folder+filename+".xtc",
                          in_memory=True, in_memory_step=200)
print("Length of MD is ", len(orig_traj.trajectory))

# Helper Functions

In [3]:
# Export Label conformations for debugging
def saveLabel(path, SL):
    labelout = path + "templabel.pdb"
    readlabel = path + "label_conformations.pdb"
    with open(labelout, 'w+') as tt:
        tt.truncate()
        xl.write_labels(tt, SL)

    # change all SYSTEM to A
    with open(labelout, 'r') as file:
        filedata = file.read()
    # Replace the target string
    filedata = filedata.replace('SYSTEM', 'A')
    # Write the file out again
    with open(labelout, 'w') as file:
        file.write(filedata)

    with open(labelout, 'r+') as spininfo:
        output = open(readlabel, "w+")
        keepw = True
        with open(readlabel, "w+") as output:
            for line in spininfo:
                # print(line)
                if "A_density" in line or "HEADER C34R1M_density" in line:
                    # print(111)
                    keepw = False
                if(keepw):
                    output.write(line)
        # print(keepw)

def flatten(l):
    return [item for sublist in l for item in sublist]
def plot_exp(ax, distr, coef = 1, color_main = 'grey'):
    ax.plot(distr[:,0],distr[:,1]*coef, label = 'Experimental', color=color_main, linewidth=3)
    ax.fill_between(distr[:,0],distr[:,2]*coef,distr[:,3]*coef, alpha=0.3, color=color_main)

class CustomEnergyFunc(xl.ljEnergyFunc):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.ljFunc = super().__call__

    def __call__(self, rotens):
        ljE = self.ljFunc(rotens, forgive=self.func_forgive)
        return ljE

def energ_func(forgive, sasa_weight = 0):
    my_sfxn = CustomEnergyFunc(functional=xl.get_lj_energy)
    my_sfxn.func_forgive=forgive
    return my_sfxn

# Get experimental distance distributions (in this case from ComparativeDeerAnalyzer)

In [None]:
exppath = '' #Path where you will keep Experimtntal distributions
plist = [''] # List of all relevant distance distributions
distributions_dict={}
for ligand in plist:
    expdir = exppath+ligand
    print(expdir)
    glob.glob(expdir+'*_distr*.dat')[0]
    distr = np.loadtxt(glob.glob(expdir+'*_distr*.dat')[0])
    r = distr[:,0]
    # Normalize distance distributio
    distr[:,1], distr[:,2], distr[:,3] = distr[:,1]/np.trapz(distr[:,1], r), distr[:,2]/np.trapz(distr[:,1], r), distr[:,3]/np.trapz(distr[:,1], r)
    distr[:,0] = distr[:,0]*10
    distributions_dict[ligand]=distr

In [5]:
def calc_dist_md(main_traj, exp_name, typename, savename):
    hist=0
    r = np.linspace(10, 100, 1000)
    distr = distributions_dict[exp_name]

    rotlib = 'R1M'

    default_names = ['N', 'N01', 'N02', 'N03'] # Names of the atoms with spin density of your ligand
    # Set parameters for ChiLife
    forgive=0.8
    sample=5000
    step = 1
    sigma=0.2
    time_range = [500,750]

    times = np.arange(time_range[0], time_range[1], step=step)
    confNum = np.zeros(len(times))
    meanDist=np.zeros(len(times))
    meddist=[]

    cutoff = 10
    Psum=0
    length = len(main_traj.trajectory)
    time_i=0
    current_mean = -1
    for idx,timestep in enumerate(range(length)[time_range[0]:time_range[1]]):    
        main_traj.trajectory[timestep]
        compl = main_traj.select_atoms(f"protein or resname UNL")
        ligand_1 = compl.select_atoms(f"resname UNL and name N*")
        
        SL_label = xl.SpinLabel(rotlib, site=34,protein=compl, sample=sample, dihedral_sigmas=np.inf, energy_func=energ_func(forgive=1))
        SL_ligand_1 = xl.IntrinsicLabel(atom_selection=ligand_1,res='UNL', spin_atoms = default_names)
        match typename:
            case "nit-ligand":
                SL1, SL2 = SL_label, SL_ligand_1
        # print(np.linalg.norm(SL1.spin_centers - SL2.spin_centers))
        P = xl.distance_distribution(SL1, SL2, r=r, use_spin_centers=False,
                                    sigma=sigma)
        P = P/sum(P)
        Psum+=P
        Pdraw = Psum/np.trapz(Psum, r)
        # saveLabel(path=path_to_folder, SL=SL1) # You can save label conformations if you want
        mean=np.mean(np.dot(r,Pdraw))/sum(Pdraw)
        mean_now=np.mean(np.dot(r,P))/sum(P)
        meanDist[idx] = mean_now
        print(timestep)
        if isinstance(SL1, xl.IntrinsicLabel) and isinstance(SL2, xl.IntrinsicLabel):
            confNum[idx] = 1
        else:
            confNum[idx] = len(SL1)
        if(np.isnan(current_mean)):
            print(mean)
            raise Exception("current_mean is Nan! Check yourself!")

        fig = plt.figure(tight_layout=True, dpi=130)
        gs=gridspec.GridSpec(2,2)
        ax1, ax2, ax3 =fig.add_subplot(gs[0,:]), fig.add_subplot(gs[1,0]), fig.add_subplot(gs[1,1])

        peak = np.max(Pdraw)
        condition = Pdraw > 0.1 * max(Pdraw)
        indices = np.where(condition)[0]
        
        # Auto-scale modeled distribution according to the experimental one
        exp_ind1= min(range(len(distr[:,0])), key=lambda i: abs(distr[i,0]-r[indices[0]]))
        exp_ind2= min(range(len(distr[:,0])), key=lambda i: abs(distr[i,0]-r[indices[-1]]))
        filtered_dist = distr[exp_ind1:exp_ind2,1]
        max_exp = np.max(filtered_dist) if filtered_dist.size > 0 else 1
        coef = peak/max_exp

        plot_exp(ax1, distr, coef)
        ax1.plot(r, Pdraw, color='black', linewidth=2)
        ax1.fill_between(r, np.zeros(len(r)), Pdraw, color='orange', alpha=0.5)
        ax1.set_ylim([0, max(Pdraw)+0.1])
        ax1.set_xlim([10,70])
        ax1.text(0,1,f"{filename} {forgive=} {sample=} {exp_name=}", transform=ax1.transAxes)
        ax1.vlines(mean, 0, 1, linestyles='dashed', label = f"$<r>$ = {round(mean,2)} A", color='red')
        errors = (r-mean)**2
        variance = np.sum(np.dot(errors,Pdraw))/np.sum(Pdraw)
        variance = np.sqrt(variance)
        fwhm = 2*np.sqrt(2*np.log(2))*variance
        ax1.hlines(max(Pdraw)/2, mean-fwhm/2, mean+fwhm/2,
                    color='green', linestyles='dashed', label = f"$<Dr>$ = {round(fwhm,2)} A")
        ax1.legend()
        

        ax2.plot(times[:idx+1],meanDist[:idx+1], label='Median Dist')
        ax2.legend()
        ax3.plot(times[:idx+1],confNum[:idx+1], label = 'Conf Num', color='orange')
        ax3.legend()
        clear_output(wait=True)
        plt.show(fig)
        print(timestep)
        gc.collect()

# Save distribution
    foldname = f"{savename}_step{step}_forgive{forgive}_range{time_range[0]}_{time_range[1]}"
    dict1 = {"r": r, "distribution": Pdraw}
    df1 = pd.DataFrame(dict1)
    # os.mkdir("E:\Docs\MD_Simulations\\Final_Distributions\\{}".format(foldname))
    os.chdir(md_dir)
    savepath = md_dir + "Final_Distributions/{}/".format(foldname)
    os.mkdir(savepath)
    df1.to_csv(savepath+filename+"_dist.dat")

In [None]:
typelist = ['nit-ligand']
# typelist contains typenames that determine distributions between whom will be calculated
for t in typelist:
    calc_dist_md(orig_traj, 'Ce6/Ce6-Nit', t, dataset_name)