User Settings:

In [None]:
from glob import glob #file regexes

files = glob("*fixed.fepout")
equil = 10000
temperature = 300

Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm #for progress bars
import re #regex
from natsort import natsorted #for sorting "naturally" instead of alphabetically

In [None]:
#Don't work right yet
#from alchemlyb.estimators import BAR 
#from alchemlyb.visualisation.dF_state import plot_dF_state
#from alchemlyb.visualisation import plot_convergence

Function Delcarations:

In [None]:
#redFEPOUT uses reads each file in a single pass: keeping track of lambda values and appending each line to an array. 
#The array is cast to a dataframe at the end to avoid appending to a dataframe
def readFEPOUT(fileName, step=1):
    colNames = ["type",'step', 'Elec_l', 'Elec_ldl', 'vdW_l', 'vdW_ldl', 'dE', 'dE_avg', 'Temp', 'dG', 'FromLambda', "ToLambda"]

    data = []

    L = np.nan
    L2 = np.nan
    LIDWS = np.nan
    
    frame = 0
    with open(fileName) as downFile:
        for line in downFile:
            if line[0] == '#':
                frame = 0
                #print(line)
                Lambda = re.search('LAMBDA SET TO (\d+(\.\d+)*)', line)
                Lambda2 = re.search('LAMBDA2 (\d+(\.\d+)*)', line)
                LambdaIDWS = re.search('LAMBDA_IDWS (\d+(\.\d+)*)', line)
                if Lambda:
                    L = Lambda.group(1)
                    #print(f'L={L}')
                if Lambda2:
                    L2 = Lambda2.group(1)
                    #print(f'L2={L2}')
                if LambdaIDWS:
                    LIDWS = LambdaIDWS.group(1)
                    #print(f'LIDWS={LIDWS}')
            elif frame % step <= 1:
                lineList = line.split()
                lineList.append(L)
                if lineList[0] == "FepEnergy:":
                    lineList.append(L2)
                elif lineList[0] == "FepE_back:":
                    lineList.append(LIDWS)
                else:
                    print(f'Unexpected line start: {lineList[0]}')
                    return 0
                data.append(lineList)
                frame = frame + 1
            else:
                frame = frame + 1

    downFile.close()
    
    df = pd.DataFrame(data).dropna()
    df.columns = colNames
    df = df.iloc[:,1:].astype(float)
    df["window"]=np.mean([df.FromLambda,df.ToLambda], axis=0)
    df["up"]=df.ToLambda>df.FromLambda
   
    df = df.sort_index()
    return df

In [None]:
def readFiles(files, step=1):
    fileList = []
    for file in tqdm(files):
        df = readFEPOUT(file, step)
        fileList.append(df)
    data = pd.concat(fileList)
    
    data.index = data.window
    data["dVdW"] = data.vdW_ldl - data.vdW_l
    
    return data

In [None]:
def u_nk_fromDF(data, temperature):
    from scipy.constants import R, calorie
    beta = 1/(R/(1000*calorie) * temperature) #So that the final result is in kcal/mol
    u_nk = pd.pivot_table(data, index=["step", "FromLambda"], columns="ToLambda", values="dE")
    u_nk = u_nk.sort_index(level=0).sort_index(axis='columns') #sort the data so it can be interpreted by the BAR estimator
    u_nk = u_nk*beta
    #u_nk = u_nk.sort_index(level=1).sort_index(axis='columns') #sort the data so it can be interpreted by the BAR estimator
    
    return u_nk

In [None]:
def get_dG(u_nk):
    #the data frame is organized from index level 1 (fep-lambda) TO column
    #dG will be FROM column TO index
    groups = u_nk.groupby(level=1)
    dG=pd.DataFrame([]) 
    for name, group in groups:
        dG[name] = np.log(np.mean(np.exp(-1*data)))
        dG = dG.copy() # this is actually faster than having a fragmented dataframe
        
    return dG

Read files

In [None]:
files = natsorted(files)
data = readFiles(files)

Trim the data to remove pre-equilibration samples

In [None]:
trimmedData = data.loc[data.step>=equil]
u_nk = u_nk_fromDF(trimmedData, temperature)

In [None]:
u_nk.sort_index(level=1)

In [None]:
dG = get_dG(u_nk, equil)
dG_f=np.diag(dG, k=1)
dG_b=np.diag(dG, k=-1)

l=dG.columns.to_list()
l_mid = np.mean([l[1:],l[:-1]], axis=0)

In [None]:
plt.plot(l_mid, dG_f, label='dG_fwd')
plt.plot(l_mid, -dG_b, label='- dG_bwd')

plt.title('Fwd and bwd free energy difference by lambda (exponential estimator)')
plt.xlabel('Lambda')
plt.ylabel('delta-G')
plt.legend()

In [None]:
plt.vlines(l_mid, np.zeros(len(l_mid)), dG_f + np.array(dG_b), label="fwd - bwd", linewidth=3)

plt.legend()
plt.title('Fwd-bwd discrepencies by lambda')
plt.xlabel('Lambda')
plt.ylabel('Diff. in delta-G')
plt.savefig("figure.png", dpi=1200)

In [None]:
print(f'The rough estimate for total dG (forward windows only) is: {np.sum(dG_f[~np.isnan(dG_f)])}. The backward estimate is {-np.sum(dG_b[~np.isnan(dG_b)])}')

Plot dE distributions for EACH complete window (may take several minutes)

In [None]:
#split into forward and backward values for each window
backward = data.dE[~(data.up) * data.step>=equil].sort_index()*(-1)
forward = data.dE[data.up * data.step>=equil].sort_index()
print(f'equilibration Time: {equil}\n backward: {backward.mean()}, forward: {forward.mean()}') 

In [1]:
import seaborn as sns

In [None]:
completeWindows = np.sort(list(set(backward.index) & set(forward.index))) #those windows which have both forward and backward data

In [None]:
for i in completeWindows:
    
    # creating a figure composed of two matplotlib.Axes objects (ax_box and ax_hist)
    f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)})

    # assigning a graph to each ax
    tempDat = [forward.loc[i], backward.loc[i]]
    
    ax_box.boxplot(tempDat, vert=False)
    ax_box.set_yticklabels(["forward", "backward"])
    plt.title(f'[{np.round(i-0.004,3)} {np.round(i+0.004, 3)}]')
    # Remove x axis name for the boxplot
    ax_box.set(xlabel='')

    sns.histplot(backward.loc[i], bins=50, label="backward", ax=ax_hist);
    sns.histplot(forward.loc[i], bins=50, label="forward", ax=ax_hist, color="orange");
    
    plt.legend()
    plt.show()
    #plt.savefig(f'./diagnosticPlots/dE_SmallerWindows{np.round(i,3)}.svg')
    plt.clf()
    plt.close()