# Plot free energy surfaces

In this notebook we plot the free energy surfaces onto different collective variables. First, we need to calculate different quantities from trajectories. This is a rather dull excercise, that we did in a separate [notebook](calculate_observables.ipynb). Here, we just read the data from pickle files

In [1]:
import pickle

# RMSD
fname = "data/observables/rmsd_full.p"
rmsd_full = pickle.load(open(fname,'rb'), encoding='bytes')

# RMSD
fname = "data/observables/rmsd_full_2koc.p"
rmsd_2koc = pickle.load(open(fname,'rb'), encoding='bytes')

fname = "data/observables/rmsd_full_6by5.p"
rmsd_6by5 = pickle.load(open(fname,'rb'), encoding='bytes')

# ERMSD
fname = "data/observables/ermsd_full.p"
ermsd_full = pickle.load(open(fname,'rb'), encoding='bytes')


# RMSD
fname = "data/observables/ermsd_full_2koc.p"
ermsd_2koc = pickle.load(open(fname,'rb'), encoding='bytes')

fname = "data/observables/ermsd_full_6by5.p"
ermsd_6by5 = pickle.load(open(fname,'rb'), encoding='bytes')

We now need to read the weights of the different ensembles. To complicate things, we have divided our dataset in 4 bins, so that we can estimate errors.

In [2]:
import numpy as np
weights = [[]for i in range(5)]

ss=["set_A1","set_B","set_C","set_D"]
thetas = [20,50,60,500]
for i,el in enumerate(ss):
    for j in range(4):
        dd = np.array([[float(x) for x in line.split()] \
                       for line in open("data/weights_%s_t_%d_rep_%d.dat" % (el,thetas[i],j))])
        if(i==0):
            weights[0].append(dd[:,0])
        weights[i+1].append(dd[:,1])

weights = np.array(weights)
print(weights.shape)

(5, 4, 25000)


We define a handy FES class, which essentially takes the weights, makes an histogram and returns the FES with lower and upper error bounds

In [3]:
from scipy import stats
import scipy
class FES:
    
    def __init__(self,data,ww,bw=0.1,blocks=4):   
        
        self.data = np.copy(data)
        self.ww = np.copy(ww)
        self.blocks=blocks
        bsize = int(data.shape[0]/blocks)
        self.bins = np.linspace(1.1*np.min(data),1.05*np.max(data),200)

        hists = []
        for j in range(blocks):
            st = j*bsize
            end = st + bsize
            f = bw/self.data[st:end].std()
            kernel = stats.gaussian_kde(data[st:end],weights=ww[j],bw_method=f)
            hh = kernel(self.bins)
            hists.append(hh)
        db = self.bins[1]-self.bins[0]
        self.hists = np.array(hists)*db
        
    def get_fes(self,temp=280,hist=False):
        
        kbt=0.008314462*temp
        
        avgs = np.average(self.hists,axis=0)
        stds = np.std(self.hists,axis=0,ddof=1)/np.sqrt(self.blocks)
        if(hist==True):
            return self.bins,avgs,avgs-stds,avgs+stds
        fes = -kbt*np.log(avgs)
        fes_up = -kbt*np.log(avgs  + stds)
        fes_low = -kbt*np.log(avgs  - stds)
        mm = np.min(fes)

        return self.bins,fes-mm,fes_low-mm,fes_up-mm
    
    def get_populations(self,r1):
        
        assert(r1[0]<r1[1])
        int1 = np.where((self.bins>r1[0]) & (self.bins<r1[1]))
        p1 = np.sum(self.hists[:,int1[0]],axis=1)/np.sum(self.hists,axis=1)
        return p1
    
    def get_df(self,r1,r2,temp=280):
    
        kbt=0.008314462*temp
        p1 = self.get_populations(r1)
        p2 = self.get_populations(r2)
        return -kbt*(np.log(p1) - np.log(p2))
    
    


We calculate the FES using projected onto the RMSD from native

In [4]:
fes_obj = []
for j in range(5):
    fes_obj.append(FES(rmsd_full,weights[j],bw=0.05))



In [5]:
import matplotlib.pyplot as plt
import seaborn as sns
cols = ["black","medium blue","pumpkin","brick red","leaf green","white"]
cp = sns.xkcd_palette(cols)
fig,ax = plt.subplots(2,2,figsize=(10,10))
plt.subplots_adjust(left=0.1, bottom=0.07, right=0.95, top=0.95, wspace=0.15, hspace=0.15)
k = 0
for i in range(2):
    for j in range(2):
        ax[i,j].set_ylim(0.,0.14)
        ax[i,j].set_xlim(-0.01,1.7)

        bins0,fes0,fes0_up, fes0_low = fes_obj[0].get_fes(hist=True)
        bins1,fes1,fes1_up, fes1_low = fes_obj[k+1].get_fes(hist=True)
        ax[i,j].plot(bins0,fes0,label="MD",color=cp[0],lw=0.5)
        ax[i,j].fill_between(bins0,fes0_low,fes0_up,alpha=0.3,color=cp[0])
        ax[i,j].plot(bins1,fes1,label="MD+%s" % ss[k],color=cp[k+1],lw=2)
        ax[i,j].fill_between(bins1,fes1_low,fes1_up,alpha=0.3,color=cp[k+1])
        ax[i,j].text(1.0,0.12,"MD+%s" % ss[k],fontsize=12)
        ax[i,j].set_xlabel("RMSD from native [nm]")

        k+=1 
plt.savefig("RMSD_fes.pdf")
plt.savefig("RMSD_fes.png",dpi=600)
plt.show()
plt.close()

<Figure size 1000x1000 with 4 Axes>

In our experience, the eRMSD better distinguishes conformations. Therefore, we use the eRMSD to calculate the histograms


In [6]:
fes_obj_ermsd = []
for j in range(5):
    fes_obj_ermsd.append(FES(ermsd_full,weights[j],bw=0.05))

In [11]:
fig,ax = plt.subplots(2,2,figsize=(4.5,4.5),sharex=True,sharey=True)
plt.subplots_adjust(left=0.1, bottom=0.12, right=0.95, top=0.95, wspace=0.16, hspace=0.1)
k = 0
cols = ["pumpkin","medium blue","dark grey","dark grey","dark grey","wine"]
cp = sns.xkcd_palette(cols)

stitles = ["set A","set B", "set C", "set D"]
kj = ["a","b","c",'d']
for i in range(2):
    for j in range(2):
        ax[i,j].set_ylim(-0.005,0.12)
        ax[i,j].set_xlim(-0.05,2.2)
        ax[i,j].set_yticks([0,0.04,0.08])
        bins0,fes0,fes0_up, fes0_low = fes_obj_ermsd[0].get_fes(hist=True)
        bins1,fes1,fes1_up, fes1_low = fes_obj_ermsd[k+1].get_fes(hist=True)
        ax[i,j].plot(bins0,fes0,label="MD",color=cp[0],lw=1.0)
        ax[i,j].fill_between(bins0,fes0_low,fes0_up,alpha=0.5,color=cp[0])
        ax[i,j].plot(bins1,fes1,label="MD+%s" % stitles[k],color=cp[k+1],lw=1)
        ax[i,j].fill_between(bins1,fes1_low,fes1_up,alpha=0.5,color=cp[k+1])
        #ax[i,j].text(1.0,0.09,"MD+%s" % stitles[k],fontsize=14,color=cp[k+1],weight="bold")
        ax[i,j].text(-0.2,0.11,kj[k],weight="bold",fontsize=12,ha="center",va="center")
        ax[i,j].legend(ncol=1)
        if(i==1): ax[i,j].set_xlabel("eRMSD from native [nm]")
        if(i==0 and j==0):
            #ax[i,j].scatter(ermsd_2koc,[-0.002]*len(ermsd_2koc),color=cp[5],label="2KOC",marker="x",s=40)
            ax[i,j].axvline(0.7,ls="--",lw=0.5,c="k")
            
        #ax[i,j].scatter(ermsd_6by5,[-0.002]*len(ermsd_6by5),color=cp[3],label="6BY5",marker="o",s=40)
        
        k+=1 
#plt.show()
plt.savefig("figures/figure3.pdf")
plt.savefig("figures/figure3.png",dpi=600)
#plt.show()
plt.close()



In [8]:
print("POPULATION of state A in different ensembles")
stitles2 = ["MD"] + stitles
for j in range(len(stitles2)):
    mean = 100*fes_obj_ermsd[j].get_populations([0,0.7]).mean()
    std = 100*fes_obj_ermsd[j].get_populations([0,0.7]).std(ddof=1)
    print("ensemble %s: %5.1f+/-%-5.1f" % (stitles2[j],mean,std))


POPULATION of state A in different ensembles
ensemble MD:  59.5+/-3.6  
ensemble set A:  83.4+/-15.5 
ensemble set B:  44.2+/-6.2  
ensemble set C:  64.9+/-5.1  
ensemble set D:  43.6+/-9.5  


Finally, we draw random samples from the refined MD+set A distribution and write PDB files to disk corresponding to state A and state B

In [9]:
# extract samples
import mdtraj as md
theta=20
top = "data/PDB/2koc_gmx.pdb"
trj = "data/traj_temp_f_0.xtc"

trajmd = md.load(trj,top=top)
native = "data/PDB/2koc_gmx.pdb"

# read weights 
data_tmp = np.array([[float(x) for x in line.split()] for line in open("data/weights_%s_t_%d.dat" % (ss[0],theta),"r")])
weights1f = data_tmp[:,1]
# find indeces where ermsd is less than 0.7
stateAi = np.where(ermsd_full<0.7)[0]

# bootstrap samples
ff = np.random.choice(len(weights1f),size=100,p=weights1f/np.sum(weights1f),replace=False)

# write to disk 10 pdbs from each basin 
ac,bc = 0,0
for j in range(100):
    if(ff[j] in stateAi and ac <10):
        trajmd[ff[j]].superpose(md.load(native)).save("stateA_sample%d.pdb" %ac)
        print("A",ermsd_full[ff[j]])
        ac += 1
    if(ff[j] not in stateAi and bc <10):
        trajmd[ff[j]].superpose(md.load(native)).save("stateB_sample%d.pdb" %bc)
        print("B",ermsd_full[ff[j]])
        bc += 1


A 0.49189376101851845
A 0.5053475869780073
A 0.4750203993345654
A 0.44078589706005517
A 0.45792117045612135
A 0.46289772079042235
A 0.47080236578794477
B 1.034829067013729
B 0.9229961687300786
A 0.6809916914848865
A 0.48077377828063267
A 0.4587504988229158
B 0.8858780023330175
B 0.8912207331773445
B 1.007689653886882
B 0.9550669339707459
B 0.925195804574741
B 1.0216470820785573
B 0.8942611264547061
B 0.8821378239896296
