## Calculation of $c_{sat}$ and $c_{con}$ from multi-chain molecular simulations in slab geometry

Authors: Giulio Tesei

Contact: giulio.tesei@bio.ku.dk

The calculations in the cells below require the data in `multi-chain/data/M1`, `multi-chain/data/M2`, `multi-chain/data/M3`, `multi-chain/data/HPS` and `multi-chain/data/Urry` which are available on Zenodo at DOI.

In [None]:
import mdtraj as md
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import string
import os
from scipy.optimize import least_squares
from scipy.optimize import curve_fit
from mpl_toolkits.axes_grid1 import make_axes_locatable
from cycler import cycler

plt.rc('axes', prop_cycle = cycler('color', ['#377eb8', '#ff7f00', '#4daf4a',
                  '#e41a1c', '#984ea3', '#f781bf', '#a65628', 
                  '#999999', '#dede00']))
cm = ['#377eb8', '#ff7f00', '#4daf4a', '#e41a1c', '#984ea3',
                  '#f781bf', '#a65628', 
                  '#999999', '#dede00']

In [None]:
def plotProfiles(T,L,proteins,model,value,error,nskip=1200):
    for i,m in enumerate(proteins):
        if os.path.isfile('multi-chain/data/'+model+'/{:s}_{:d}.npy'.format(m,T)):
            h = np.load('multi-chain/data/'+model+'/{:s}_{:d}.npy'.format(m,T))
            fasta = df_proteins.loc[m].fasta
            N = len(fasta)
            mw = np.sum([df_residues.loc[r].MW for r in fasta])+18
            conv = 100/6.022/N/L/L*1e3
            h = h[nskip:]*conv 
            lz = h.shape[1]+1
            edges = np.arange(-lz/2.,lz/2.,1)/10
            dz = (edges[1]-edges[0])/2.
            z = edges[:-1]+dz
            profile = lambda x,a,b,c,d : .5*(a+b)+.5*(b-a)*np.tanh((np.abs(x)-c)/d)
            residuals = lambda params,*args : ( args[1] - profile(args[0], *params) )
            hm = np.mean(h,axis=0)
            #zmid = z[hm>np.quantile(hm,.98)].mean()
            #z -= zmid
            z1 = z[z>0]
            h1 = hm[z>0]
            z2 = z[z<0]
            h2 = hm[z<0]
            p0=[hm.min(),hm.max(),3,1]
            res1 = least_squares(residuals, x0=p0, args=[z1, h1], bounds=([0]*4,[1e3]*4))
            res2 = least_squares(residuals, x0=p0, args=[z2, h2], bounds=([0]*4,[1e3]*4))
            
            if (res1.x[3]>res1.x[2]) or (res2.x[3]>res2.x[2]):
                zDS = res1.x[2] if res1.x[2]>res2.x[2] else res2.x[2]
                zDS = 10 if zDS<1 else zDS
                cutoffs1 = [zDS,-zDS]
                cutoffs2 = [zDS+25,-zDS-25]
            else:
                cutoffs1 = [res1.x[2]-.5*res1.x[3],-res2.x[2]+.5*res2.x[3]]
                cutoffs2 = [res1.x[2]+6*res1.x[3],-res2.x[2]-6*res2.x[3]]
                
            bool1 = np.logical_and(z<cutoffs1[0],z>cutoffs1[1])
            bool2 = np.logical_or(z>cutoffs2[0],z<cutoffs2[1])

            dilarray = np.apply_along_axis(lambda a: a[bool2].mean(), 1, h)
            denarray = np.apply_along_axis(lambda a: a[bool1].mean(), 1, h)

            dil = hm[bool2].mean()
            den = hm[bool1].mean()
            
            value.loc[m,model+'_dil'] = dilarray.mean() # (res1.x[1]+res2.x[1])/2. 
            value.loc[m,model+'_den'] = denarray.mean() # (res1.x[0]+res2.x[0])/2. 
            value.loc[m][model+'_dilarray'] = dilarray 
            value.loc[m][model+'_denarray'] = denarray 
            rhoden = []
            rhodil = []
            chunk = 1200
            split_array = [h[h.shape[0]%chunk:][i*chunk:(i+1)*chunk] for i in range(h.shape[0]//chunk)]
            for hs in split_array:
                hm = np.mean(hs,axis=0)  
                dil = hm[bool2].mean()
                den = hm[bool1].mean()
                rhodil.append( dil )
                rhoden.append( den )
            error.loc[m,model+'_dil'] = np.std(rhodil)/np.sqrt(len(rhodil))
            error.loc[m,model+'_den'] = np.std(rhoden)/np.sqrt(len(rhoden))
            print(m,mw,len(rhodil),value.loc[m,model+'_dil'],'±',error.loc[m,model+'_dil'],
                  1e-3*mw*value.loc[m,model+'_dil'],cutoffs2)
            print(m,mw,len(rhodil),value.loc[m,model+'_den'],'±',error.loc[m,model+'_den'],
                  1e-3*mw*value.loc[m,model+'_den'])
            if np.abs(cutoffs2[1]/cutoffs2[0]) > 2:
                print('WRONG')
            if np.abs(cutoffs2[1]/cutoffs2[0]) < 0.5:
                print('WRONG')
        else:
            print('DATA NOT FOUND FOR',m)

In [None]:
df_proteins = pd.read_pickle('multi-chain/proteins.pkl')
df_residues = pd.read_csv('residues.csv').set_index('one')

Calculate $c_{sat}$ and $c_{con}$ from simulations at 323 K (HPS, M1–3) and 297 K (Urry).

In [None]:
ms = ['Urry_den','Urry_dil','HPS_den','HPS_dil','M1_den','M1_dil','M2_den','M2_dil','M3_den','M3_dil',
     'Urry_dilarray','HPS_dilarray','M1_dilarray','M2_dilarray','M3_dilarray',
     'Urry_denarray','HPS_denarray','M1_denarray','M2_denarray','M3_denarray']
vv = pd.DataFrame(columns=ms,
        index=['LAF1300_80','LAF1300','LAF1300shuf','LAF1300D2130','LAF1HOOMD','LAF1','LAF1_80','LAF1D2130','LAF1shuf',
               'A2NS','M12FP12Y','P4D','M6R','A1','P8D','P7FM7Y',
               'M8FP4Y','M4D','M9FP3Y','M10R','P7R','FUS','A2','Ddx4WT','P12D','P2R','M3RP3K','P7KP12D'])
ee = vv.copy()

for m in ['Urry','HPS','M1','M2','M3']: 
    print(m)
    T = 297 if m=='Urry' else 323
    plotProfiles(T,17,['Ddx4WT'],m,vv,ee)
    plotProfiles(T,24,['LAF1300_80','LAF1300','LAF1300shuf','LAF1300D2130'],m,vv,ee)
    plotProfiles(T,15,['LAF1HOOMD','LAF1','LAF1_80','LAF1D2130','LAF1shuf','A2NS','M12FP12Y','P4D','M6R','A1','P8D',
            'P7FM7Y','M8FP4Y','M4D','M9FP3Y','M10R','P7R','FUS','A2','P12D','P2R','M3RP3K','P7KP12D'],m,vv,ee)

vv.to_pickle('multi-chain/data/rho_dense_value.pkl')

ee.to_pickle('multi-chain/data/rho_dense_error.pkl')

Calculate $c_{sat}$ and $c_{con}$ from simulations of A1 LCD variants performed with M1–3 at 310 K.

In [None]:
ms = ['M1_den','M1_dil','M2_den','M2_dil','M3_den','M3_dil',
     'M1_dilarray','M2_dilarray','M3_dilarray',
     'M1_denarray','M2_denarray','M3_denarray']
vv = pd.DataFrame(columns=ms,
        index=['M6R','A1','P8D','P7FM7Y','M8FP4Y','M4D','M9FP3Y','M10R','P7R'])
ee = vv.copy()

for m in ['M1','M2','M3']: 
    print(m)
    plotProfiles(310,15,['M6R','A1','P8D','P7FM7Y','M8FP4Y','M4D','M9FP3Y','M10R','P7R'],m,vv,ee)

vv.to_pickle('multi-chain/data/rho_dense_value_310.pkl')

ee.to_pickle('multi-chain/data/rho_dense_error_310.pkl')

Calculate $c_{sat}$ and $c_{con}$ from simulations of Ddx4 LCD variants at 297 K. 

In [None]:
ms = ['Urry_den','Urry_dil','HPS_den','HPS_dil','M1_den','M1_dil','M2_den','M2_dil','M3_den','M3_dil',
     'Urry_dilarray','HPS_dilarray','M1_dilarray','M2_dilarray','M3_dilarray',
     'Urry_denarray','HPS_denarray','M1_denarray','M2_denarray','M3_denarray']

vv = pd.DataFrame(columns=ms,
        index=['Ddx4WT','Ddx4CS','Ddx4FA','Ddx4RK'])
ee = vv.copy()

for m in ['Urry','HPS','M1','M2','M3']: 
    plotProfiles(297,17,['Ddx4WT','Ddx4CS','Ddx4FA','Ddx4RK'],m,vv,ee)

vv.to_pickle('multi-chain/data/rho_dense_value_297.pkl')

ee.to_pickle('multi-chain/data/rho_dense_error_297.pkl')