# Lysozyme PDB
## Determine ellipsoid geometry that will reproduce the theoretical scattering of the Lysozyme PDB (lysozyme_centered.pdb)

In [1]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Range1d, Span
from bokeh.palettes import Colorblind8 as palette
from bokeh.layouts import gridplot
from bokeh.charts import Scatter

In [2]:
output_notebook()

In [3]:
from sas_modeling.make_figures import solarized as palette

In [4]:
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

In [5]:
import os
import sys
import numpy as np

## Comaparison to the 22.5 A x 15 A x 15 A ellipsoid
The plots below compare the theoretical scattering from the Lysozyme PDB (`lysozyme_centered.pdb`) and an ellipsoid with dimensions matching what Monica cited from Yun Liu.

theoretical scattering from the PDB

In [37]:
# iq_goal_fname = 'data/lysozyme_00001.iq'
# assert os.path.exists(iq_goal_fname)
# iq_goal = np.loadtxt(iq_goal_fname)

In [63]:
iq_goal_fname = 'data/exp_lys/rebinned_12_exp_data_lysozyme.dat'
assert os.path.exists(iq_goal_fname)
iq_goal = np.loadtxt(iq_goal_fname)

theoretical scattering from Yun Liu's ellipsoid definition (22.5 A x 15 A x 15 A)

In [39]:
iq_model_fname = 'lys_ellipsoid/lys_ellipsoid_00001.iq'
assert os.path.exists(iq_model_fname)
iq_model = np.loadtxt(iq_model_fname)

In [40]:
p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)', x_axis_type='log', y_axis_type='log')
p1 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)')

p1.line(iq_goal[:, 0], iq_goal[:, 1], color=palette[0], line_width=2, line_dash=[4, 0], legend='experiment')
p1.line(iq_model[:, 0], iq_model[:, 1], color=palette[1], line_width=2, line_dash=[4, 4], legend='22.5x15x15 model')
p2.line(iq_goal[:, 0], iq_goal[:, 1], color=palette[0], line_width=2, line_dash=[4, 0], legend='experiment')
p2.line(iq_model[:, 0], iq_model[:, 1], color=palette[1], line_width=2, line_dash=[4, 4], legend='22.5x15x15 model')

x_range = (iq_goal[1, 0], iq_goal[-1, 0])
y_range = (min(iq_goal[:, 1]) * 0.5, max(iq_goal[:, 1]) * 1.1)
logging.debug('x range: {}'.format(x_range))
logging.debug('y range: {}'.format(y_range))
p1.x_range = Range1d(*x_range)
p1.y_range = Range1d(*y_range)
p2.x_range = p1.x_range
p2.y_range = p1.y_range
p2.legend.location = 'bottom_left'

plots = gridplot([[p1, p2]])
show(plots)

DEBUG: x range: (0.036364, 0.40000000000000002)
DEBUG: y range: (5.8035833333333331e-05, 0.012320000000000001)


## Comaparison to a grid scan of the ellipsoid dimensions: <br/> 20.5-24.5 A x 13-17 A x 13-17 A

In [41]:
import sasmol.sasmol as sasmol
atom = sasmol.SasMol(0)
atom.read_pdb('data/lysozyme_centered.pdb')
atom_rg = atom.calcrg(0)

model = sasmol.SasMol(0)
model.read_pdb('lys_ellipsoid/lys_ellipsoid_z.pdb')
model_rg = model.calcrg(0)

logging.debug('atomistic Rg: {}'.format(atom_rg))
logging.debug('model Rg: {}'.format(model_rg))

reading filename:  data/lysozyme_centered.pdb
num_atoms =  1960
>>> found  1  model(s) or frame(s)
finished reading frame =  1
reading filename:  lys_ellipsoid/lys_ellipsoid_z.pdb
num_atoms =  2000
>>> found 

DEBUG: atomistic Rg: 13.9946706991
DEBUG: model Rg: 13.8646412507


 1  model(s) or frame(s)
finished reading frame =  1


In [42]:
import pandas as pd
run_dir = 'lys_ellipsoid_scan1'

dimensions and Radius of Gyration for each of the generated ellipsoids

In [43]:
log_fname = os.path.join(run_dir, 'sx13to17_sy13to17_sz20.5to24.5.log')
log = pd.read_csv(log_fname, delimiter='\t', index_col=[0], escapechar='#')
log.rename(columns=lambda x: x.strip(), inplace=True)

In [44]:
log.head()

Unnamed: 0_level_0,rg,sx,sy,sz
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,12.320748,13.0,13.0,20.5
2,12.507873,13.0,14.0,20.5
3,12.706,13.0,15.0,20.5
4,13.069334,13.0,16.0,20.5
5,13.311778,13.0,17.0,20.5


R-factor and filename for the theoretical I(Q) of each of the generated ellipsoids

In [45]:
r_fname = os.path.join(run_dir, 'sascalc_12/neutron_D2Op_100/nq_12_r_factors.txt')
r = pd.read_csv(r_fname, delimiter='\t', index_col=[0])
r.rename(columns=lambda x: x.strip(), inplace=True)

In [46]:
r.head()

Unnamed: 0,r,scale,fnames
1,0.049451,0.011101,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
2,0.042831,0.011125,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
3,0.039164,0.011149,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
4,0.036621,0.011196,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
5,0.047032,0.011228,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...


combined results

In [47]:
result = pd.concat([log, r], axis=1)

In [48]:
result.head()

Unnamed: 0_level_0,rg,sx,sy,sz,r,scale,fnames
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,12.320748,13.0,13.0,20.5,0.049451,0.011101,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
2,12.507873,13.0,14.0,20.5,0.042831,0.011125,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
3,12.706,13.0,15.0,20.5,0.039164,0.011149,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
4,13.069334,13.0,16.0,20.5,0.036621,0.011196,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
5,13.311778,13.0,17.0,20.5,0.047032,0.011228,./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...


In [49]:
i_best = result['r'].idxmin()
i_worst = result['r'].idxmax()
logging.info('\nbest: {}\nworst: {}'.format(i_best, i_worst))

INFO: 
best: 76
worst: 125


### The best parameters

In [50]:
result.loc[i_best]

rg                                                  13.3488
sx                                                       13
sy                                                       13
sz                                                     23.5
r                                                 0.0248975
scale                                             0.0112296
fnames    ./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
Name: 76, dtype: object

### The worst parameters

In [51]:
result.loc[i_worst]

rg                                                  15.3821
sx                                                       17
sy                                                       17
sz                                                     24.5
r                                                  0.166328
scale                                             0.0115277
fnames    ./lys_ellipsoid_scan1/sascalc_12/neutron_D2Op_...
Name: 125, dtype: object

### Load and Plot the theoretical scattering

In [52]:
len(iq_goal)

12

In [53]:
iq_data = np.empty((len(iq_goal), len(result)+1), order='F')
iq_data[:, 0] = iq_goal[:, 0]
for i in xrange(len(result)):
    this_iq_data = np.loadtxt(result.fnames.iloc[i])
    iq_data[:, i+1] = this_iq_data[:, 1] * iq_goal[0, 1] / this_iq_data[0, 1]

In [54]:
def errorbar(fig, x, y, xerr=None, yerr=None, color='red', 
             point_kwargs={}, error_kwargs={}):

    fig.circle(x, y, color=color, **point_kwargs)

    if xerr is not None:
        x_err_x = []
        x_err_y = []
        for px, py, err in zip(x, y, xerr):
            x_err_x.append((px - err, px + err))
            x_err_y.append((py, py))
        fig.multi_line(x_err_x, x_err_y, color=color, **error_kwargs)

    if yerr is not None:
        y_err_x = []
        y_err_y = []
        for px, py, err in zip(x, y, yerr):
            y_err_x.append((px, px))
            y_err_y.append((py - err, py + err))
        fig.multi_line(y_err_x, y_err_y, color=color, **error_kwargs)

In [55]:
result.loc[i_best].sz

23.5

In [56]:
p1 = Scatter(result, x='rg', y='r', color=palette[1], width=400, height=400, 
             xlabel='Rg (A)', ylabel='R-factor')
vline = Span(location=atom_rg, dimension='height', line_color=palette[0], line_width=3)
p1.renderers.extend([vline])

p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)') #, x_axis_type='log', y_axis_type='log')

#for i in result.index:
#    p2.line(iq_data[:, 0], iq_data[:, i], color='black', line_width=2, legend='scan')
errorbar(p2, iq_goal[:, 0], iq_goal[:, 1], yerr=iq_goal[:, 1]*0.01, color=palette[0], 
         point_kwargs={'legend': 'atomistic PDB'})
p2.line(iq_data[:, 0], iq_data[:, i_best], color=palette[1], line_width=2, line_dash=[5, 1], 
        legend='best: {}x{}x{}'.format(result.loc[i_best].sz, result.loc[i_best].sx, result.loc[i_best].sy))
p2.line(iq_model[:, 0], iq_model[:, 1], color=palette[2], line_width=2, 
        legend='Liu: 22.5x15x15')
p2.line(iq_data[:, 0], iq_data[:, i_worst], color=palette[3], line_width=2, line_dash=[4, 2, 2], 
        legend='worst')

plots = gridplot([[p1, p2]])
show(plots)

In [86]:
iq_goal_fname = 'data/exp_lys/1mgml_lys_sans.dat'
assert os.path.exists(iq_goal_fname)
iq_goal = np.loadtxt(iq_goal_fname)

In [87]:
iq_goal[0, 1] *= 0.97

In [88]:
iq_goal[:, 1:] /= iq_goal[0, 1]
iq_goal

array([[ 0.        ,  1.        ,  0.01775931,  0.        ],
       [ 0.02704   ,  0.96974258,  0.06277236,  0.45597024],
       [ 0.03053   ,  0.97180772,  0.03643743,  0.46014367],
       [ 0.03403   ,  0.96755135,  0.03721663,  0.46511628],
       [ 0.03754   ,  0.95585373,  0.03170142,  0.47079926],
       [ 0.04105   ,  0.91650586,  0.03309358,  0.47701502],
       [ 0.04457   ,  0.88374945,  0.02983365,  0.48385235],
       [ 0.04809   ,  0.8558468 ,  0.0289935 ,  0.49113367],
       [ 0.05161   ,  0.84126801,  0.02740832,  0.49894776],
       [ 0.05513   ,  0.85766969,  0.02456565,  0.50729464],
       [ 0.05865   ,  0.81462905,  0.02648159,  0.5159967 ],
       [ 0.06217   ,  0.77186142,  0.02608114,  0.52505394],
       [ 0.0657    ,  0.77271121,  0.02616191,  0.53455517],
       [ 0.06922   ,  0.76673028,  0.02500099,  0.54432279],
       [ 0.07274   ,  0.73219357,  0.0250707 ,  0.5545344 ],
       [ 0.07625   ,  0.70531538,  0.02462   ,  0.56501239],
       [ 0.07977   ,  0.

In [89]:
q1 = iq_data[1, 0]

In [90]:
np.argmin(np.abs(iq_goal[:10, 0] - q1))

4

In [91]:
p1 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)', 
            title='Scale to I(0)') #, x_axis_type='log', y_axis_type='log')

for i in result.index:
    p1.line(iq_data[:, 0], iq_data[:, i]/iq_data[0, i], color='black', line_width=2, legend='scan', alpha=0.1)

errorbar(p1, iq_goal[:, 0], iq_goal[:, 1], yerr=iq_goal[:, 2], color=palette[0], 
         point_kwargs={'legend': 'experimental data'})

p1.line(iq_data[:, 0], iq_data[:, i_best]/iq_data[0, i_best], color=palette[1], line_width=2, line_dash=[5, 1], 
        legend='best: {}x{}x{}'.format(result.loc[i_best].sz, result.loc[i_best].sx, result.loc[i_best].sy))

p1.line(iq_data[:, 0], iq_data[:, i_worst]/iq_data[0, i_worst], color=palette[3], line_width=2, line_dash=[4, 2, 2], 
        legend='worst: {}x{}x{}'.format(result.loc[i_worst].sz, result.loc[i_worst].sx, result.loc[i_worst].sy))

q1 = iq_data[1, 0]
i_match = np.argmin(np.abs(iq_goal[:10, 0] - q1))
scale = iq_goal[i_match, 1] / iq_data[1, 1]

p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)', 
            title='Scale to I(Q1)') #, x_axis_type='log', y_axis_type='log')

for i in result.index:
    p2.line(iq_data[:, 0], iq_data[:, i]*scale, color='black', line_width=2, legend='scan', alpha=0.1)

errorbar(p2, iq_goal[:, 0], iq_goal[:, 1], yerr=iq_goal[:, 2], 
         color=palette[0], 
         point_kwargs={'legend': 'atomistic PDB'})

p2.line(iq_data[:, 0], iq_data[:, i_best]*scale, color=palette[1], line_width=2, line_dash=[5, 1], 
        legend='best: {}x{}x{}'.format(result.loc[i_best].sz, result.loc[i_best].sx, result.loc[i_best].sy))

p2.line(iq_data[:, 0], iq_data[:, i_worst]*scale, color=palette[3], line_width=2, line_dash=[4, 2, 2], 
        legend='worst: {}x{}x{}'.format(result.loc[i_worst].sz, result.loc[i_worst].sx, result.loc[i_worst].sy))


plots = gridplot([[p1, p2]])
show(plots)

These Rg values are calculated from the PDBs representing ellipsoids filled with coordinates.  The vertical blue line indicates the atomistic Lysozyme PDB.  The error for the atomistic PDB is 1% of the scattering.