# Lysozyme PDB
## Determine ellipsoid geometry that will reproduce the theoretical scattering of the Lysozyme PDB (lysozyme_centered.pdb)

In [1]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Range1d, Span
from bokeh.palettes import Colorblind8 as palette
from bokeh.layouts import gridplot
from bokeh.charts import Scatter

In [2]:
output_notebook()

In [3]:
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

In [4]:
import os
import sys
import numpy as np

## Comaparison to the 22.5 A x 15 A x 15 A ellipsoid
The plots below compare the theoretical scattering from the Lysozyme PDB (`lysozyme_centered.pdb`) and an ellipsoid with dimensions matching what Monica cited from Yun Liu.

theoretical scattering from the PDB

In [5]:
iq_goal_fname = 'data/lysozyme_00001.iq'
assert os.path.exists(iq_goal_fname)
iq_goal = np.loadtxt(iq_goal_fname)

theoretical scattering from Yun Liu's ellipsoid definition (22.5 A x 15 A x 15 A)

In [6]:
iq_model_fname = 'lys_ellipsoid/lys_ellipsoid_00001.iq'
assert os.path.exists(iq_model_fname)
iq_model = np.loadtxt(iq_model_fname)

In [7]:
p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)', x_axis_type='log', y_axis_type='log')
p1 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)')

p1.line(iq_goal[:, 0], iq_goal[:, 1], color=palette[0], line_width=2, line_dash=[4, 0], legend='experiment')
p1.line(iq_model[:, 0], iq_model[:, 1], color=palette[1], line_width=2, line_dash=[4, 4], legend='22.5x15x15 model')
p2.line(iq_goal[:, 0], iq_goal[:, 1], color=palette[0], line_width=2, line_dash=[4, 0], legend='experiment')
p2.line(iq_model[:, 0], iq_model[:, 1], color=palette[1], line_width=2, line_dash=[4, 4], legend='22.5x15x15 model')

x_range = (iq_goal[1, 0], iq_goal[-1, 0])
y_range = (min(iq_goal[:, 1]) * 0.5, max(iq_goal[:, 1]) * 1.1)
logging.debug('x range: {}'.format(x_range))
logging.debug('y range: {}'.format(y_range))
p1.x_range = Range1d(*x_range)
p1.y_range = Range1d(*y_range)
p2.x_range = p1.x_range
p2.y_range = p1.y_range
p2.legend.location = 'bottom_left'

plots = gridplot([[p1, p2]])
show(plots)

DEBUG: x range: (0.0050000000000000001, 0.29999999999999999)
DEBUG: y range: (0.0038509999999999998, 1.1000000000000001)


## Comaparison to a grid scan of the ellipsoid dimensions: <br/> 20.5-24.5 A x 13-17 A x 13-17 A

In [8]:
import sasmol.sasmol as sasmol
atom = sasmol.SasMol(0)
atom.read_pdb('data/lysozyme_centered.pdb')
atom_rg = atom.calcrg(0)

model = sasmol.SasMol(0)
model.read_pdb('lys_ellipsoid/lys_ellipsoid_z.pdb')
model_rg = model.calcrg(0)

logging.debug('atomistic Rg: {}'.format(atom_rg))
logging.debug('model Rg: {}'.format(model_rg))

DEBUG: atomistic Rg: 13.9946706991
DEBUG: model Rg: 13.8646412507


reading filename:  data/lysozyme_centered.pdb
num_atoms =  1960
>>> found  1  model(s) or frame(s)
finished reading frame =  1
reading filename:  lys_ellipsoid/lys_ellipsoid_z.pdb
num_atoms =  2000
>>> found  1  model(s) or frame(s)
finished reading frame =  1


In [9]:
import pandas as pd
run_dir = 'lys_ellipsoid_scan1'

dimensions and Radius of Gyration for each of the generated ellipsoids

In [10]:
log_fname = os.path.join(run_dir, 'sx13to17_sy13to17_sz20.5to24.5.log')
log = pd.read_csv(log_fname, delimiter='\t', index_col=[0], escapechar='#')
log.rename(columns=lambda x: x.strip(), inplace=True)

In [11]:
log.head()

Unnamed: 0_level_0,rg,sx,sy,sz
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,12.320748,13.0,13.0,20.5
2,12.507873,13.0,14.0,20.5
3,12.706,13.0,15.0,20.5
4,13.069334,13.0,16.0,20.5
5,13.311778,13.0,17.0,20.5


R-factor and filename for the theoretical I(Q) of each of the generated ellipsoids

In [12]:
r_fname = os.path.join(run_dir, 'sascalc_61/neutron_D2Op_100/pdb_r_factors.txt')
r = pd.read_csv(r_fname, delimiter='\t', index_col=[0])
r.rename(columns=lambda x: x.strip(), inplace=True)

In [13]:
r.head()

Unnamed: 0,r,fnames
1,0.063434,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
2,0.038537,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
3,0.032567,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
4,0.017184,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
5,0.018528,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...


combined results

In [14]:
result = pd.concat([log, r], axis=1)

In [15]:
result.head()

Unnamed: 0_level_0,rg,sx,sy,sz,r,fnames
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,12.320748,13.0,13.0,20.5,0.063434,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
2,12.507873,13.0,14.0,20.5,0.038537,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
3,12.706,13.0,15.0,20.5,0.032567,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
4,13.069334,13.0,16.0,20.5,0.017184,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
5,13.311778,13.0,17.0,20.5,0.018528,lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...


In [16]:
i_best = result['r'].idxmin()
i_worst = result['r'].idxmax()
logging.info('\nbest: {}\nworst: {}'.format(i_best, i_worst))

INFO: 
best: 56
worst: 125


### The best parameters

In [17]:
result.loc[i_best]

rg                                                  13.3364
sx                                                       14
sy                                                       13
sz                                                     22.5
r                                                0.00630226
fnames    lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
Name: 56, dtype: object

### The worst parameters

In [18]:
result.loc[i_worst]

rg                                                  15.3821
sx                                                       17
sy                                                       17
sz                                                     24.5
r                                                  0.153462
fnames    lys_ellipsoid_scan1/sascalc_61/neutron_D2Op_10...
Name: 125, dtype: object

### Load and Plot the theoretical scattering

In [21]:
iq_data = np.empty((len(iq_goal), len(result)+1), order='F')
iq_data[:, 0] = iq_goal[:, 0]
for i in xrange(len(result)):
    this_iq_data = np.loadtxt(result.fnames.iloc[i])
    iq_data[:, i+1] = this_iq_data[:, 1] * iq_goal[0, 1] / this_iq_data[0, 1]

In [29]:
def errorbar(fig, x, y, xerr=None, yerr=None, color='red', 
             point_kwargs={}, error_kwargs={}):

    fig.circle(x, y, color=color, **point_kwargs)

    if xerr is not None:
        x_err_x = []
        x_err_y = []
        for px, py, err in zip(x, y, xerr):
            x_err_x.append((px - err, px + err))
            x_err_y.append((py, py))
        fig.multi_line(x_err_x, x_err_y, color=color, **error_kwargs)

    if yerr is not None:
        y_err_x = []
        y_err_y = []
        for px, py, err in zip(x, y, yerr):
            y_err_x.append((px, px))
            y_err_y.append((py - err, py + err))
        fig.multi_line(y_err_x, y_err_y, color=color, **error_kwargs)

In [47]:
result.loc[i_best].sz

22.5

In [52]:
p1 = Scatter(result, x='rg', y='r', color=palette[1], width=400, height=400, 
             xlabel='Rg (A)', ylabel='R-factor')
vline = Span(location=atom_rg, dimension='height', line_color=palette[0], line_width=3)
p1.renderers.extend([vline])

p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)') #, x_axis_type='log', y_axis_type='log')

for i in result.index:
    p2.line(iq_data[:, 0], iq_data[:, i], color='black', line_width=2, legend='scan')
errorbar(p2, iq_goal[:, 0], iq_goal[:, 1], yerr=iq_goal[:, 1]*0.01, color=palette[0], 
         point_kwargs={'legend': 'atomistic PDB'})
p2.line(iq_data[:, 0], iq_data[:, i_best], color=palette[1], line_width=2, line_dash=[5, 1], 
        legend='best: {}x{}x{}'.format(result.loc[i_best].sz, result.loc[i_best].sx, result.loc[i_best].sy))
p2.line(iq_model[:, 0], iq_model[:, 1], color=palette[2], line_width=2, 
        legend='Liu: 22.5x15x15')
p2.line(iq_data[:, 0], iq_data[:, i_worst], color=palette[3], line_width=2, line_dash=[4, 2, 2], 
        legend='worst')

plots = gridplot([[p1, p2]])
show(plots)

These Rg values are calculated from the PDBs representing ellipsoids filled with coordinates.  The vertical blue line indicates the atomistic Lysozyme PDB.  The error for the atomistic PDB is 1% of the scattering.