In [None]:
from AFEP_parse import *
from alchemlyb.preprocessing import subsampling
import os
from scipy.signal import correlate
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
from scipy.optimize import curve_fit, leastsq

# User parameters
## IMPORTANT: Make sure the temperature matches the temperature used to run the simulations.

In [None]:
path='/home/ems363/Documents/ELIC_DCDs_Analyses/POPC120/POEG_23/'
filename='PO*.fepout'

temperature = 303.15
decorrelate = True #Flag for automatic decorrelation
detectEQ = False #Flag for automatic equilibrium detection

In [None]:
RT = 0.00198720650096 * temperature # RT in kcal/mol
fepoutFiles = glob(path+filename)
fepoutFiles = natsorted(fepoutFiles)
maxSize = 10**9 #Don't use the alchemlyb parser if larger than this size. (bytes)
totalSize = 0
for file in fepoutFiles:
    totalSize += os.path.getsize(file)
print(f"Will process {len(fepoutFiles)} fepout files with total size:{np.round(totalSize/10**9, 2)}GB")

if totalSize>maxSize:
    print(f"Warning: The files you are trying to read are quite large. Total size={totalSize}.\nTry the read, decorrelate, save method in the Expanded version of this notebook or increase the maxSize variable above.\nIn the future, consider using less frequent sampling (e.g. every 100 steps).")

# Read and process files
See Shirts and Chodera (2008) for more details

"Statistically optimal analysis of samples from multiple equilibrium states" doi: 10.1063/1.2978177

In [None]:
u_nk, affix = readAndProcess(fepoutFiles, temperature, decorrelate, detectEQ) #u_nk stores the fep data, affix is a string for meaningful file naming

In [None]:
perWindow, cumulative = doEstimation(u_nk)

In [None]:
changeAndError = f'\u0394G = {np.round(cumulative.BAR.f.iloc[-1]*RT, 1)}\u00B1{np.round(cumulative.BAR.errors.iloc[-1], 3)} kcal/mol'
print(changeAndError)

# Plot the change in free energy based on MBAR estimates

In [None]:
fig, (cumAx, eachAx) = plt.subplots(2,1, sharex=True)
# Cumulative change in kcal/mol
cumAx.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1)
cumAx.set(ylabel=r'Cumulative $\rm\Delta G_{\lambda}$'+'\n(kcal/mol)')

# Per-window change in kcal/mol
eachAx.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1)
eachAx.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5)
eachAx.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5)
eachAx.set(xlabel=r'$\lambda$', ylabel=r'$\rm\Delta G_{\lambda}$'+'\n(kcal/mol)')

fig.set_figwidth(5)
fig.set_figheight(8)
fig.tight_layout()
plt.savefig(f'{path}dG_{affix}.pdf')
plt.savefig(f'{path}dG_{affix}.png', dpi=600)
plt.show()

# Plot the estimated total change in free energy as a function of simulation time; contiguous subsets starting at t=0 ("Forward") and t=end ("Reverse")

In [None]:
convergence_plot(u_nk, cumulative.index, units='kcal/mol', RT=RT)
plt.savefig(f"{path}_Convergence_{affix}.pdf")
plt.savefig(f"{path}_Convergence_{affix}.png", dpi=600)


# Check for hysteresis by comparing forward and backward samples using an exponential estimator

In [None]:
fig, (histAx, pdfAx) = plt.subplots(2, 1)
X, Y, pdfX, pdfY, fitted, pdfXnorm, pdfYnorm, pdfYexpected = getPDF(np.array(perWindow.EXP.dG_f)*RT, np.array(perWindow.EXP.dG_b)*RT)
histAx.vlines(perWindow.index, np.zeros(len(perWindow)), perWindow.EXP['difference']*RT, label="fwd - bwd", linewidth=2)

std = np.std(X)
mean = np.average(X)
temp = pd.Series(pdfY, index=pdfX)
mode = temp.idxmax()
histAx.set(xlabel=r'$\lambda$', ylabel=r'$\delta_\lambda$ (kcal/mol)')
textstr = r"$\rm{mode_{pdf}=}$"+f"{np.round(mode,2)}"+"\n"+fr"$\mu$={np.round(mean,2)}"+"\n"+fr"$\sigma$={np.round(std,2)}"
props = dict(boxstyle='square', facecolor='white', alpha=0.5)
pdfAx.text(0.15, 0.95, textstr, transform=pdfAx.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)


pdfAx.plot(pdfX, pdfY,  label="Estimated Distribution")
pdfAx.set(xlabel=r'$\delta_\lambda$ (kcal/mol)', ylabel="PDF")

fig.set_figheight(8)
fig.tight_layout()
plt.savefig(f"{path}pdf_{affix}.pdf")
plt.savefig(f"{path}pdf_{affix}.png", dpi=600)

plt.show()