In [None]:
import plumed
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
import MDAnalysis as mda
import pandas as pd

import random
from deeptime.decomposition import TICA
from deeptime.covariance import KoopmanWeightingEstimator
from deeptime.clustering import MiniBatchKMeans
from deeptime.markov import TransitionCountEstimator
from deeptime.markov.msm import MaximumLikelihoodMSM
from copy import deepcopy
from numpy.random import multinomial

import math
from scipy.interpolate import griddata
from scipy.stats import ks_2samp
from scipy.optimize import curve_fit
from statsmodels.distributions.empirical_distribution import ECDF

Initialization Section

 Cal FES Section

In [None]:
### Visualizarion and analysis functions    
def cal_OPES_1DFES(cv,dir,sigma=None,block=10,min_slope_index=None):
    if sigma == None:
        KERNEL = plumed.read_as_pandas(f'../../FES/{dir}/KERNELS')
        sigma = KERNEL.loc[:,f'sigma_{cv}'].iloc[-1]
    !python /root/FES_from_Reweighting.py -f ../../FES/{dir}/OPES-COLVAR -o ../../FES/{dir}/1D_FES -s {sigma}  --temp 310 --cv {cv} --bias opes.bias #--stride 100
    FES = plumed.read_as_pandas(f'../../FES/{dir}/1D_FES') 
    COLVAR = plumed.read_as_pandas(f'../../FES/{dir}/OPES-COLVAR')
    x_start = COLVAR[f'{cv}'].iloc[0]
    if x_start < 0:
        x_start = x_start + 2*math.pi
    x_start = x_start/math.pi*180

    !python /root/FES_from_Reweighting.py -f ../../FES/{dir}/OPES-COLVAR -o ../../FES/{dir}/1D_FES_{block} -s {sigma}  --temp 310 --cv {cv} --bias opes.bias --blocks {block}
    block_number = np.around(len(COLVAR[f'{cv}'])/block)
    error = pd.read_csv(f'../../FES/{dir}/1D_FES_{block}_log',sep='\t',header=None)
        
    block_error = float(error.iloc[:,2])
    print(f'block numbers are {block_number}')
    print(f'block errors are {block_error}')
    ERROR = plumed.read_as_pandas(f'../../FES/{dir}/1D_FES_{block}') 

    plot_FES = []
    plot_CV = []
    plot_uncertainty = []
    shift_FES = []
    shift_CV = []
    shift_uncertainty = []
    for i in range(len(ERROR[f'{cv}'])):
        if ERROR[f'{cv}'][i] < 0:
            ERROR[f'{cv}'][i] = ERROR[f'{cv}'][i]+2*math.pi
            shift_CV.append(ERROR[f'{cv}'][i]/math.pi*180)
            shift_FES.append(ERROR['file.free'][i])
            shift_uncertainty.append(ERROR['uncertainty'][i])
        else:
            plot_CV.append(ERROR[f'{cv}'][i]/math.pi*180)
            plot_FES.append(ERROR['file.free'][i])
            plot_uncertainty.append(ERROR['uncertainty'][i])
    plot_CV = plot_CV+shift_CV
    plot_FES = plot_FES + shift_FES
    plot_FES = np.array(plot_FES)/4.184
    plot_uncertainty = plot_uncertainty + shift_uncertainty
    plot_uncertainty = np.array(plot_uncertainty)/4.184

    !rm ../../FES/{dir}/1D_FES_*_*
    return plot_CV,plot_FES,plot_uncertainty,x_start
def plot_allFES(plot_CV,plot_FES,plot_uncertainty,x_start):
    colors = ['C0','C1','C2']
    plt.plot(figsize=(10,6))
    for i in range(len(plot_CV)):
        plt.plot(plot_CV[i],plot_FES[i],label=['MhOR5-EOL','MhOR5-DEET',r'MhOR5$_{apo}$'][i])
        plt.fill_between(plot_CV[i],np.array(plot_FES[i])-np.array(plot_uncertainty[i]),np.array(plot_FES[i])+np.array(plot_uncertainty[i]),alpha=0.5)
        # plt.vlines(x_start[i],ymin=0,ymax=80,linewidth=1,color=colors[i],alpha=0.8,linestyle='--')
    plt.xlabel(r'$\chi_2^{W158}$ (degree)',fontsize=20)
    plt.ylabel('Free Energy (kcal/mol)',fontsize=20)
    plt.ylim(0,20)
    plt.xlim(0,360)
    plt.xticks(np.arange(0,370,60))
    plt.yticks(np.arange(0,21,5))
    plt.axvspan(np.min(x_start), np.max(x_start), color='gray', alpha=0.3)
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'../../FES/chi21D_FES_all.pdf',dpi=300)
    plt.show()

In [None]:
Aplot_CV_1,Aplot_FES_1,Aplot_uncertainty_1,Ax_start_1 = cal_OPES_1DFES('torsion_chi1-162',dir='APO',sigma=0.0628,min_slope_index=5,block = 10)
Dplot_CV_1,Dplot_FES_1,Dplot_uncertainty_1,Dx_start_1 = cal_OPES_1DFES('torsion_chi1-162',dir='DEET',sigma=0.0628,min_slope_index=5,block = 10)
Eplot_CV_1,Eplot_FES_1,Eplot_uncertainty_1,E_xstart_1 = cal_OPES_1DFES('torsion_chi1-162',dir='EOL',sigma=0.0628,min_slope_index=5,block = 10)
plot_CV_1 = [Eplot_CV_1,Dplot_CV_1,Aplot_CV_1]
plot_FES_1 = [Eplot_FES_1,Dplot_FES_1,Aplot_FES_1]
plot_uncertainty_1 = [Eplot_uncertainty_1,Dplot_uncertainty_1,Aplot_uncertainty_1]
x_start_1 = [E_xstart_1,Dx_start_1,Ax_start_1]

In [None]:
Aplot_CV_2,Aplot_FES_2,Aplot_uncertainty_2,Ax_start_2 = cal_OPES_1DFES('torsion_chi2-162',sigma=0.0628,dir='APO',min_slope_index=5,block=10)
Dplot_CV_2,Dplot_FES_2,Dplot_uncertainty_2,Dx_start_2 = cal_OPES_1DFES('torsion_chi2-162',sigma=0.0628,dir='DEET',min_slope_index=5,block=10)
Eplot_CV_2,Eplot_FES_2,Eplot_uncertainty_2,E_xstart_2 = cal_OPES_1DFES('torsion_chi2-162',sigma=0.0628,dir='EOL',min_slope_index=5,block=10)
plot_CV_2 = [Eplot_CV_2,Dplot_CV_2,Aplot_CV_2]
plot_FES_2 = [Eplot_FES_2,Dplot_FES_2,Aplot_FES_2]
plot_uncertainty_2 = [Eplot_uncertainty_2,Dplot_uncertainty_2,Aplot_uncertainty_2]
x_start_2 = [E_xstart_2,Dx_start_2,Ax_start_2]

In [None]:
plot_allFES(plot_CV_1,plot_FES_1,plot_uncertainty_1,x_start_1)

In [None]:
plot_allFES(plot_CV_2,plot_FES_2,plot_uncertainty_2,x_start_2)

Cal Kinetics Section


In [None]:
def cal_kinetics(dir,barrier):
    T = 310 #Temperature in K
    dt = 200 #The frequency at which colvars is deposited (in ps)
    time_unit = 10**12 #in ps
    kT = 0.008314462618*T #in kJ/mol

    average_accelerate = []

    COLVAR = plumed.read_as_pandas(f'./{dir}/OPES-COLVAR')
    bias = np.array(COLVAR.loc[:,'opes.bias'])
    dtotal = np.array(COLVAR.loc[:,'dtotal'])
    time = np.array(COLVAR.loc[:,'time'])

    f1 = open(f'./{dir}/time','w')
    print('simulation_time, CV, accelerate_time(ns),accumulate_accelerate_time(ns)',file=f1)

    accumulate_accelerate = 0
    accumulate_time = 0
    for j in range(len(time)):
        accumulate_accelerate += np.exp((bias[j]+barrier)/kT)
        average_accelerate.append(accumulate_accelerate/(j+1))
        accelerate = np.exp(((bias[j]+barrier)/kT))
        accumulate_time += dt*accelerate

        print(time[j],dtotal[j],accumulate_time/time_unit,(time[j]/time_unit)*(accumulate_accelerate/(j+1)),file=f1)
        if j == len(time)-1:
            final_unrescaled_time = time[j]
            final_rescaled_time = accumulate_time/time_unit
            final_time_print = f'{time[j]},{dtotal[j]},{accumulate_time/time_unit},{(time[j]/time_unit)*(accumulate_accelerate/(j+1))}\n'
            average_accelerate_print= accumulate_accelerate/(j+1)

    f1.close()
    print(f'the off time of {dir} is: {accumulate_time/time_unit} (s) = {accumulate_time/time_unit*10**3} ms = {accumulate_time/time_unit*10**6} us')
    print(f'now the average accelerate factor is {average_accelerate_print}')
    
    plt.plot(time/(10**3),average_accelerate)

    plt.xlabel('MD simulation time (ns)')
    plt.ylabel('Average Accelerate Factor')
    plt.grid(True)

    plt.show()
    return final_time_print,final_rescaled_time,final_unrescaled_time
def evaluate_kinetics(final_time_print,final_rescaled_time,final_unrescaled_time,title,bootstrap=True,boots=50):
    with open(f'./{title}-all-time.dat','w') as f:
        for i in final_time_print:
            f.writelines(i)
    final_rescaled_time = np.array(final_rescaled_time)
    final_unrescaled_time = np.array(final_unrescaled_time)

    fig, ax = plt.subplots(nrows=1, ncols=1, sharey=True)

    def func(x,tau):
        return 1-np.exp(-x/tau)
    with open(f'./{title}-results-ks.dat','w') as out:
        out.write('Results-KS:\n')

    def KStest(final_rescaled_time):
        for i in range(len(final_rescaled_time)):
        
            mint = min(final_rescaled_time/100)
            maxt = max(final_rescaled_time*100)
        
            ###########################
            # for numerical stability we divide by the mean
            mu = np.mean(final_rescaled_time)
            sigma = np.std(final_rescaled_time)
            t_m = np.median(final_rescaled_time)
        
            x=final_rescaled_time/mu
            # now compute empirical CDF
            ecdf = ECDF(x)
            x1 = np.logspace(np.log10(mint/mu), np.log10(maxt/mu),10000)
            y1 = ecdf(x1)
            # fit to theoretical CDF to obtain tau
            popt,pcov = curve_fit(func,x1,y1)
            tau=popt[0]
            yfit=func(x1,tau)
        
            x2 = np.random.gamma(1,tau,10000000)
            st,pvalue = ks_2samp(x2,x)
            pvalue = '{:.10f}'.format(pvalue)
            str_tau=str(tau*mu)[:4]
            str_p=str(pvalue)[:4]
            
            ax.step(x1*mu, y1,'k-',lw=1. )
            ax.plot(x1*mu,yfit,'b-',lw=3.)
            ax.set_xscale('log')
            return float(tau*mu*1E+3),float(pvalue)
    tau,pvalue = KStest(final_rescaled_time)
    if bootstrap:
        bootstraps = []
        taus = []
        ps = []
        for boot in range(boots):
            bootstraps.append(np.array(random.choices(final_rescaled_time,k=len(final_rescaled_time))))
        for bootstrap_s in bootstraps:
            taus.append(KStest(bootstrap_s)[0])
            ps.append(KStest(bootstrap_s)[1])

        tau_std = np.std(taus)
        p_std = np.std(ps)

    plt.xlabel('Time (s)',fontsize=16)
    plt.ylabel('Cumulative Probability',fontsize=16)
    plt.title(title,fontsize=16)
    # plt.xticks(fontsize=16)
    # plt.yticks(fontsize=16)
    plt.xscale('log')
    plt.ylim(0,1.005)
    plt.tight_layout()
    plt.savefig(f'./{title}KS-test.png', bbox_inches='tight', dpi=300,transparent=True)
    plt.show()
    return tau,pvalue,tau_std,p_std

def plot_trajs(dir_list,cv1,cv2,plot_CV,plot_FES,plot_uncertainty):
    length = max(len(dir_list[0]),len(dir_list[1]))+1
    fig,axs = plt.subplots(length,2,figsize=(10,1*length))
    for j, dirs in enumerate(dir_list):
        for k, dir in enumerate(dirs):

            COLVAR = plumed.read_as_pandas(f'./{dir}/OPES-COLVAR')
            tica0 = (COLVAR.loc[:,f'{cv1}'].iloc[::1]/math.pi*180).tolist()
            tica1 = (COLVAR.loc[:,f'{cv2}'].iloc[::1]).tolist()
            shift_tica0 = []
            for i in tica0:
                if i < 0:
                    shift_tica0.append(i+360)
                else:
                    shift_tica0.append(i)
            tica0 = shift_tica0

            points = np.array([tica0,tica1]).T.reshape(-1,1,2)
            segments = np.concatenate([points[:-1],points[1:]],axis=1)
            for segment in segments:
                if  segment[1,1] > 1.2 and segment[0,1] > 1.2:
                    axs[k+1,j].plot(segment[:,0],segment[:,1],color='C2')
                elif np.mean(segment[:,1]) > 0.9:
                    axs[k+1,j].plot(segment[:,0],segment[:,1],color='C1')
                elif segment[0,1] < 0.9 and segment[1,1] > 1.2 or segment[0,1] > 1.2 and segment[1,1] < 0.9:
                    axs[k+1,j].plot(segment[:,0],segment[:,1],color='C1')
                else:
                    axs[k+1,j].plot(segment[:,0],segment[:,1],color='C0')
            axs[k+1,j].set_ylim(0,2.5)
            if j == 0:
                axs[k+1,j].set_ylabel(r'$d_{l-p}$ (nm)',fontsize=10)
            if j != 0:
                axs[k+1,j].set_yticks([])

            axs[k+1,j].set_xlim(0,360)
            if k + 1 == len(dirs):
                axs[k+1,j].set_xticks(np.arange(0,361,120))
            else:
                axs[k+1,j].set_xticks([])

            axs[k+1,j].axvline(110,color='black',linestyle='--',alpha=0.3)
            axs[k+1,j].axvline(240,color='black',linestyle='--',alpha=0.3)

            axs[k+1,j].axhline(0.9,color='black',linestyle='--',alpha=0.3)

    axs[0,0].plot(plot_CV,plot_FES,color='C1')
    axs[0,0].fill_between(plot_CV,np.array(plot_FES)-np.array(plot_uncertainty),np.array(plot_FES)+np.array(plot_uncertainty),color='C0',alpha=0.3)
    axs[0,0].set_title('Top',fontsize=16)
    axs[0,0].axvline(110,color='black',linestyle='--',alpha=0.3)
    axs[0,0].axvline(240,color='black',linestyle='--',alpha=0.3)
    axs[0,0].set_xticks([])

    axs[0,1].plot(plot_CV,plot_FES,color='C1')
    axs[0,1].fill_between(plot_CV,np.array(plot_FES)-np.array(plot_uncertainty),np.array(plot_FES)+np.array(plot_uncertainty),color='C0',alpha=0.3)
    axs[0,1].set_title('Side',fontsize=16)
    axs[0,1].axvline(110,color='black',linestyle='--',alpha=0.3)
    axs[0,1].axvline(240,color='black',linestyle='--',alpha=0.3)
    axs[0,1].set_xticks([])
    axs[0,1].set_yticks([])

    axs[-1,0].set_xlabel(r'$\chi_1^{W158}$ (degree)',fontsize=16)
    axs[-1,1].set_xlabel(r'$\chi_1^{W158}$ (degree)',fontsize=16)

    for j, dirs in enumerate(dir_list):
        n_valid = len(dirs)
        for i in range(n_valid + 1, length):
            axs[i, j].axis('off')
    plt.tight_layout()
    plt.savefig(f'EOLtrajs{cv1}.pdf',dpi=300,transparent=True)
    plt.show()
def cal_koff(toff,tstd):
    koff = 1000/toff
    koff_std = 1000*tstd/(toff**2)
    return koff,koff_std

In [None]:
top_list = ['4','16','14','8','18','2','7','6']
side_list = ['3','12','19','11','1','9','13','10','17','5','15','20']
out_list = [top_list,side_list]
total_list = [top_list,side_list]

In [None]:
plot_trajs(total_list,'chi1-162','dtotal',Eplot_CV_1,Eplot_FES_1,Eplot_uncertainty_1)

In [None]:
plot_trajs(total_list,'chi2-162','dtotal',Eplot_CV_2,Eplot_FES_2,Eplot_uncertainty_2)

In [None]:
top_final_time_print_list = []
top_final_rescaled_time_list = []
top_final_unrescaled_time_list = []
side_final_time_print_list = []
side_final_rescaled_time_list = []
side_final_unrescaled_time_list = []
no_final_time_print_list = []
no_final_rescaled_time_list = []
no_final_unrescaled_time_list = []
for i in top_list:
    final_time_print,final_rescaled_time,final_unrescaled_time = cal_kinetics(i,20)
    top_final_time_print_list.append(final_time_print)
    top_final_rescaled_time_list.append(final_rescaled_time)
    top_final_unrescaled_time_list.append(final_unrescaled_time)
for i in side_list:
    final_time_print,final_rescaled_time,final_unrescaled_time = cal_kinetics(i,20)
    side_final_time_print_list.append(final_time_print)
    side_final_rescaled_time_list.append(final_rescaled_time)
    side_final_unrescaled_time_list.append(final_unrescaled_time)

total_final_time_print_list = top_final_time_print_list + side_final_time_print_list 
total_final_rescaled_time_list = top_final_rescaled_time_list + side_final_rescaled_time_list 
total_final_unrescaled_time_list = top_final_unrescaled_time_list + side_final_unrescaled_time_list 
print(len(total_final_time_print_list))
print(len(total_final_rescaled_time_list))
print(len(total_final_unrescaled_time_list))

In [None]:
toffp2,pp2,tstdp2,pstdp2 = evaluate_kinetics(side_final_time_print_list,side_final_rescaled_time_list,side_final_unrescaled_time_list,title='P2')

In [None]:
print(f'toff of P2 is {toffp2} +/- {tstdp2}')
print(f'p value of P2 is {pp2} +/- {pstdp2}')
koffp2,koff_std_p2 = cal_koff(toffp2,tstdp2)
print(f'koff of P2 is {koffp2} +/- {koff_std_p2}')

In [None]:
toffp1,pp1,tstdp1,pstdp1 = evaluate_kinetics(top_final_time_print_list,top_final_rescaled_time_list,top_final_unrescaled_time_list,title='P1')

In [None]:
print(f'toff of P1 is {toffp1} +/- {tstdp1}')
print(f'p value of P1 is {pp1} +/- {pstdp1}')
koffp1,koff_std_p1 = cal_koff(toffp1,tstdp1)
print(f'koff of P1 is {koffp1} +/- {koff_std_p1}')

In [None]:
toffpall,ppall,tstdpall,pstdpall = evaluate_kinetics(total_final_time_print_list,total_final_rescaled_time_list,total_final_unrescaled_time_list,title='Pall')

In [None]:
print(f'toff of Pall is {toffpall} +/- {tstdpall}')
print(f'p value of Pall is {ppall} +/- {pstdpall}')
koffpall,koff_std_pall = cal_koff(toffpall,tstdpall)
print(f'koff of Pall is {koffpall} +/- {koff_std_pall}')

MLE for DEET

In [None]:
#MLE estimation
out_final_rescaled_time_list = top_final_rescaled_time_list + side_final_rescaled_time_list
taumle = np.sum(total_final_rescaled_time_list*1000)/len(out_final_rescaled_time_list)
taumle_std = np.sum(total_final_rescaled_time_list*1000)/(len(out_final_rescaled_time_list)**1.5)
koffmle,koff_std_mle = cal_koff(taumle,taumle_std)

print(f'koff of MLE is {koffmle} +/- {koff_std_mle}')
print(f'tau of MLE is {taumle} +/- {taumle_std}')