## Calculation of $c_{sat}$ from direct-coexistence simulations 

Author: Giulio Tesei

Contact: giulio.tesei@bio.ku.dk

In [None]:
import mdtraj as md
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from numba import jit
import string
from scipy.ndimage import gaussian_filter1d
from mdtraj import element
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import os
from scipy.optimize import least_squares
from scipy.stats import pearsonr, spearmanr
from scipy.optimize import curve_fit
from mpl_toolkits.axes_grid1 import make_axes_locatable
from cycler import cycler
from matplotlib.colors import LogNorm
import warnings
import itertools
warnings.filterwarnings('ignore')
import MDAnalysis as mda
import MDAnalysis.analysis.msd as msd
from statsmodels.tsa.stattools import acf
import sys

#!git clone https://github.com/fpesceKU/BLOCKING.git

sys.path.append('BLOCKING')
from main import BlockAnalysis
    
def calcProfiles(T,L,proteins,model,value,error,nskip=1200):
    df_proteins = pd.read_pickle('MC/proteins.pkl')
    for i,m in enumerate(proteins):
        if os.path.isfile('MC/{:s}/{:s}_{:d}.npy'.format(model,m,T)):
            h = np.load('MC/{:s}/{:s}_{:d}.npy'.format(model,m,T))
            fasta = df_proteins.loc[m].fasta
            N = len(fasta)
            conv = 100/6.022/N/L/L*1e3
            h = h[nskip:]*conv 
            lz = h.shape[1]+1
            edges = np.arange(-lz/2.,lz/2.,1)/10
            dz = (edges[1]-edges[0])/2.
            z = edges[:-1]+dz
            profile = lambda x,a,b,c,d : .5*(a+b)+.5*(b-a)*np.tanh((np.abs(x)-c)/d)
            residuals = lambda params,*args : ( args[1] - profile(args[0], *params) )
            hm = np.mean(h,axis=0)
            z1 = z[z>0]
            h1 = hm[z>0]
            z2 = z[z<0]
            h2 = hm[z<0]
            p0=[1,1,1,1]
            res1 = least_squares(residuals, x0=p0, args=[z1, h1], bounds=([0]*4,[100]*4))
            res2 = least_squares(residuals, x0=p0, args=[z2, h2], bounds=([0]*4,[100]*4))
            
            cutoffs1 = [res1.x[2]-.5*res1.x[3],-res2.x[2]+.5*res2.x[3]]
            cutoffs2 = [res1.x[2]+6*res1.x[3],-res2.x[2]-6*res2.x[3]]
            
            if np.abs(cutoffs2[1]/cutoffs2[0]) > 2:
                print('WRONG',m,model,cutoffs1,cutoffs2)
                print(res1.x,res2.x)
            if np.abs(cutoffs2[1]/cutoffs2[0]) < 0.5:
                print('WRONG',m,model,cutoffs1,cutoffs2)
                print(res1.x,res2.x)
                plt.plot(z1, h1)
                plt.plot(z2, h2)
                plt.plot(z1,profile(z1,*res1.x),color='tab:blue')
                plt.plot(z2,profile(z2,*res2.x),color='tab:orange')
                cutoffs2[0] = -cutoffs2[1]
                print(cutoffs2)
                
            bool1 = np.logical_and(z<cutoffs1[0],z>cutoffs1[1])
            bool2 = np.logical_or(z>cutoffs2[0],z<cutoffs2[1])

            dilarray = np.apply_along_axis(lambda a: a[bool2].mean(), 1, h)
            denarray = np.apply_along_axis(lambda a: a[bool1].mean(), 1, h)

            dil = hm[bool2].mean()
            den = hm[bool1].mean()
            
            block_dil = BlockAnalysis(dilarray)
            block_den = BlockAnalysis(denarray)
            block_dil.SEM()
            block_den.SEM()

            value.loc[m,model+'_{:d}_dil'.format(T)] = block_dil.av 
            value.loc[m,model+'_{:d}_den'.format(T)] = block_den.av 
            
            error.loc[m,model+'_{:d}_dil'.format(T)] = block_dil.sem 
            error.loc[m,model+'_{:d}_den'.format(T)] = block_den.sem
        else:
            print('DATA NOT FOUND FOR',m,model)
            
def calcProfiles_rc(T,L,proteins,model,value,error,nskip=1200):
    df_proteins = pd.read_pickle('MC/proteins.pkl')
    for i,m in enumerate(proteins):
        if os.path.isfile('MC/{:s}/{:s}_{:d}.npy'.format(model,m,T)):
            h = np.load('MC/{:s}/{:s}_{:d}.npy'.format(model,m,T))
            fasta = df_proteins.loc[m].fasta
            N = len(fasta)
            conv = 100/6.022/N/L/L*1e3
            h = h[nskip:]*conv 
            lz = h.shape[1]+1
            edges = np.arange(-lz/2.,lz/2.,1)/10
            dz = (edges[1]-edges[0])/2.
            z = edges[:-1]+dz
            profile = lambda x,a,b,c,d : .5*(a+b)+.5*(b-a)*np.tanh((np.abs(x)-c)/d)
            residuals = lambda params,*args : ( args[1] - profile(args[0], *params) )
            hm = np.mean(h,axis=0)
            z1 = z[z>0]
            h1 = hm[z>0]
            z2 = z[z<0]
            h2 = hm[z<0]
            p0=[1,1,1,1]
            res1 = least_squares(residuals, x0=p0, args=[z1, h1], bounds=([0]*4,[100]*4))
            res2 = least_squares(residuals, x0=p0, args=[z2, h2], bounds=([0]*4,[100]*4))
            
            cutoffs1 = [res1.x[2]-.5*res1.x[3],-res2.x[2]+.5*res2.x[3]]
            cutoffs2 = [res1.x[2]+6*res1.x[3],-res2.x[2]-6*res2.x[3]]
            
            if np.abs(cutoffs2[1]/cutoffs2[0]) > 2:
                print('WRONG',m,model,cutoffs1,cutoffs2)
                print(res1.x,res2.x)
            if np.abs(cutoffs2[1]/cutoffs2[0]) < 0.5:
                print('WRONG',m,model,cutoffs1,cutoffs2)
                print(res1.x,res2.x)
                plt.plot(z1, h1)
                plt.plot(z2, h2)
                plt.plot(z1,profile(z1,*res1.x),color='tab:blue')
                plt.plot(z2,profile(z2,*res2.x),color='tab:orange')
                cutoffs2[0] = -cutoffs2[1]
                print(cutoffs2)
                
            bool1 = np.logical_and(z<cutoffs1[0],z>cutoffs1[1])
            bool2 = np.logical_or(z>cutoffs2[0],z<cutoffs2[1])

            dilarray = np.apply_along_axis(lambda a: a[bool2].mean(), 1, h)
            denarray = np.apply_along_axis(lambda a: a[bool1].mean(), 1, h)

            dil = hm[bool2].mean()
            den = hm[bool1].mean()
            
            block_dil = BlockAnalysis(dilarray)
            block_den = BlockAnalysis(denarray)
            block_dil.SEM()
            block_den.SEM()

            value.loc[m,model+'_dil'] = block_dil.av 
            value.loc[m,model+'_den'] = block_den.av 
            
            error.loc[m,model+'_dil'] = block_dil.sem 
            error.loc[m,model+'_den'] = block_den.sem
        else:
            print('DATA NOT FOUND FOR',m,model)

In [None]:
df_residues = pd.read_csv('MC/residues.csv').set_index('one',drop=False)
df_proteins = pd.read_pickle('MC/proteins.pkl')

Calculate protein concentrations in the dilute and protein-rich phases from slab simulations performed using different cutoffs for the nonionic interactions. All concentrations are expressed in mM units.

In [None]:
vv = pd.DataFrame(index=['A1','LAF1shuf','LAF1','ht4007'],dtype=object)
ee = vv.copy()

for m,T in zip(['M1_20','M1_25','M1_30','M1_40'],[310]*4): 
    calcProfiles_rc(T,15,['A1'],m,vv,ee)
for m,T in zip(['M1_20','M1_25','M1_30','M1_40'],[277]*4): 
    calcProfiles_rc(T,25,['ht4007'],m,vv,ee)
for m,T in zip(['M1_20','M1_25','M1_30','M1_40'],[323]*4): 
    calcProfiles_rc(T,15,['LAF1shuf'],m,vv,ee)
for m,T in zip(['M1_20','M1_25','M1_30','M1_40'],[293]*4): 
    calcProfiles_rc(T,15,['LAF1'],m,vv,ee)

vv.to_pickle('MC/rho_dense_value_cutoff.pkl')
ee.to_pickle('MC/rho_dense_error_cutoff.pkl')

Calculate protein concentrations in the dilute and protein-rich phases from slab simulations performed using $r_c=2$ and $4$ nm for the nonionic interactions and different stickiness parameters. All concentrations are expressed in mM units.

In [None]:
seq_293 = ['LAF1','LAF1D2130','LAF1shuf','A1S150','A1S200','A1S300',
               'A1S500','M23SP23T','P23GM23S','A1','M14NP14Q','P23GM23SP7FM7Y',
               'P23GM23SM12FP12Y','M10GP10S','M20GP20S',
               'M12FP12Y','P7FM7Y','M30GP30S']
seq_277 = ['M12FP12Y','P4D','M6R','A1','P2R','P8D','M14NP14Q','M10GP10S',
            'P7FM7Y','M20GP20S','M23SP23T','M8FP4Y','M3RP3K','M4D','M9FP3Y']

In [None]:
vv = pd.DataFrame(dtype=object)
ee = vv.copy()

calcProfiles(323,15,seq_293,'M1_40',vv,ee)
calcProfiles(310,15,seq_277,'M1_40',vv,ee)

calcProfiles(293,15,seq_293,'M1_20',vv,ee)
calcProfiles(297,15,['FUS','A2'],'M1_20',vv,ee)
calcProfiles(297,17,['Ddx4WT'],'M1_20',vv,ee)
calcProfiles(277,15,seq_277,'M1_20',vv,ee)

calcProfiles(293,15,seq_293,'05_24_20',vv,ee)
calcProfiles(297,15,['FUS','A2'],'05_24_20',vv,ee)
calcProfiles(297,17,['Ddx4WT'],'05_24_20',vv,ee)
calcProfiles(277,15,seq_277,'05_24_20',vv,ee)

calcProfiles(293,15,seq_293,'M1_25_20',vv,ee)

calcProfiles(293,15,seq_293,'M1_24_20',vv,ee)
calcProfiles(277,15,seq_277,'M1_24_20',vv,ee)

calcProfiles(293,15,seq_293,'M1_22_20',vv,ee)

calcProfiles(293,15,seq_293,'M1_20_20',vv,ee)
calcProfiles(277,15,seq_277,'M1_20_20',vv,ee)

In [None]:
# Experimental c_sat values

# DOI: 10.1038/s41557-021-00840-w
exp_csat_mean_std = np.array([[2.320E-06, 5.374E-07],
[4.460E-06, 9.77508E-07],
[7.131E-06, 2.220E-07],
[1.293E-05, 2.627E-07],                            
[1.800E-05, 4.500E-06],
[1.86801E-05, 9.35443E-07],
[2.237E-05, 1.465E-06],
[2.85714E-05, 1.039E-06],
[4.940E-05, 3.383E-06],
[5.328E-05, 2.79153E-06],
[6.873E-05, 1.173E-06],
[6.315E-05, 1.63288E-06],
[8.310E-05, 1.030E-05],
[8.879E-05, 5.47449E-06],
[1.150E-04, 1.87203E-06]])

# DOI: 10.1038/s41557-021-00840-w
vv.loc[seq_277,'exp_277_dil'] = exp_csat_mean_std[:,0]*1e3
ee.loc[seq_277,'exp_277_dil'] = exp_csat_mean_std[:,1]*1e3
vv.loc[['M23SP23T','M14NP14Q'],'exp_293_dil'] = [.3422,.1716]
ee.loc[['M23SP23T','M14NP14Q'],'exp_293_dil'] = [.01991,.005073]
# DOI: 10.1073/pnas.2000223117
vv.loc[['LAF1','LAF1D2130','LAF1shuf'],'exp_293_dil'] = [.044,.275,.006]
# DOI: 10.1038/s41467-021-24727-z
vv.loc[['A1S150','A1S200','A1S300','A1S500'] ,'exp_293_dil'] = [.21806,.15976,.09338,.06646]
# A2, DOI: 10.15252/embj.2020105001
# Ddx4, DOI: 10.1073/pnas.1706197114
# FUS, DOI: 10.1038/s41594-019-0250-x
vv.loc[['A2','Ddx4WT','FUS'] ,'exp_297_dil'] = [.015,.230,.105]

In [None]:
seq_293_A1 = ['P23GM23S', 'A1', 'M10GP10S', 'M20GP20S', 'M30GP30S', 'P23GM23SM12FP12Y', 
              'M12FP12Y', 'P23GM23SP7FM7Y', 'P7FM7Y']
exp = pd.read_csv('MC/exp_csat_20deg.dat',
            index_col=0,sep='\t',names=[0,1,2],comment='#')
exp.index = exp.index.map(lambda x: x.replace('-','M').replace('+','P'))
exp = exp.loc[seq_293_A1]
vv.loc[exp.index,'exp_293_dil'] = exp.agg(axis=1,func=np.mean).values/1e3
ee.loc[exp.index,'exp_293_dil'] = exp.agg(axis=1,func=np.std).values/1e3

In [None]:
vv.to_pickle('MC/conc_value.pkl')
ee.to_pickle('MC/conc_error.pkl')