# Analysis of 2nd torus model optimization to match FC region

In [1]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Range1d, Span
from bokeh.palettes import Dark2_5 as palette
from bokeh.layouts import gridplot
from bokeh.charts import Scatter

In [2]:
output_notebook()

In [3]:
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

In [4]:
import os
import sys

import numpy as np

In [5]:
iq_goal_fname = 'data/lysozyme_00001.iq'
assert os.path.exists(iq_goal_fname)
iq_goal = np.loadtxt(iq_goal_fname)

iq_model_fname = 'lys_ellipsoid/lys_ellipsoid_00001.iq'
assert os.path.exists(iq_model_fname)
iq_model = np.loadtxt(iq_model_fname)

In [6]:
p1 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)', x_axis_type='log', y_axis_type='log')
p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)')

p1.line(iq_goal[:, 0], iq_goal[:, 1], color=palette[0], line_width=2, line_dash=[4, 0], legend='experiment')
p1.line(iq_model[:, 0], iq_model[:, 1], color=palette[1], line_width=2, line_dash=[4, 4], legend='22.5x15x15 model')
p2.line(iq_goal[:, 0], iq_goal[:, 1], color=palette[0], line_width=2, line_dash=[4, 0], legend='experiment')
p2.line(iq_model[:, 0], iq_model[:, 1], color=palette[1], line_width=2, line_dash=[4, 4], legend='22.5x15x15 model')

x_range = (iq_goal[1, 0], iq_goal[-1, 0])
y_range = (min(iq_goal[:, 1]) * 0.5, iq_goal[1, 1] * 1.5)
logging.debug('x range: {}'.format(x_range))
logging.debug('y range: {}'.format(y_range))
p1.x_range = Range1d(*x_range)
p1.y_range = Range1d(*y_range)
p2.x_range = p1.x_range
p2.y_range = p1.y_range
p1.legend.location = 'bottom_left'

plots = gridplot([[p1, p2]])
show(plots)

DEBUG: x range: (0.0050000000000000001, 0.29999999999999999)
DEBUG: y range: (0.0038509999999999998, 1.4977845000000001)


In [7]:
import sasmol.sasmol as sasmol
atom = sasmol.SasMol(0)
atom.read_pdb('data/lysozyme_centered.pdb')
atom_rg = atom.calcrg(0)

model = sasmol.SasMol(0)
model.read_pdb('lys_ellipsoid/lys_ellipsoid_z.pdb')
model_rg = model.calcrg(0)

logging.debug('atomistic Rg: {}'.format(atom_rg))
logging.debug('model Rg: {}'.format(model_rg))

DEBUG: atomistic Rg: 13.9946706991
DEBUG: model Rg: 13.8646412507


reading filename:  data/lysozyme_centered.pdb
num_atoms =  1960
>>> found  1  model(s) or frame(s)
finished reading frame =  1
reading filename:  lys_ellipsoid/lys_ellipsoid_z.pdb
num_atoms =  2000
>>> found  1  model(s) or frame(s)
finished reading frame =  1


In [8]:
import pandas as pd
run_dir = 'lys_ellipsoid_scan1'

log_fname = os.path.join(run_dir, 'sx13to17_sy13to17_sz20.5to24.5.log')
log = pd.read_csv(log_fname, delimiter='\t', index_col=[0], escapechar='#')
log.rename(columns=lambda x: x.strip(), inplace=True)

r_fname = os.path.join(run_dir, 'sascalc/neutron_D2Op_100/pdb_r_factors.txt')
r = pd.read_csv(r_fname, delimiter='\t', index_col=[0])
r.rename(columns=lambda x: x.strip(), inplace=True)

In [9]:
r.head()

Unnamed: 0,r,fnames
1,0.063434,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
2,0.038537,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
3,0.032567,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
4,0.017184,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
5,0.018528,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...


In [10]:
log.head()

Unnamed: 0_level_0,rg,sx,sy,sz
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,12.320748,13.0,13.0,20.5
2,12.507873,13.0,14.0,20.5
3,12.706,13.0,15.0,20.5
4,13.069334,13.0,16.0,20.5
5,13.311778,13.0,17.0,20.5


In [11]:
result = pd.concat([log, r], axis=1)

In [12]:
result.head()

Unnamed: 0_level_0,rg,sx,sy,sz,r,fnames
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,12.320748,13.0,13.0,20.5,0.063434,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
2,12.507873,13.0,14.0,20.5,0.038537,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
3,12.706,13.0,15.0,20.5,0.032567,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
4,13.069334,13.0,16.0,20.5,0.017184,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
5,13.311778,13.0,17.0,20.5,0.018528,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...


In [13]:
i_best = result['r'].idxmin()
i_worst = result['r'].idxmax()
logging.info('\nbest: {}\nworst: {}'.format(i_best, i_worst))

INFO: 
best: 56
worst: 125


In [14]:
result[result.sz == 22.5]

Unnamed: 0_level_0,rg,sx,sy,sz,r,fnames
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
51,13.072,13.0,13.0,22.5,0.022476,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
52,13.1693,13.0,14.0,22.5,0.007421,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
53,13.434784,13.0,15.0,22.5,0.019585,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
54,13.612343,13.0,16.0,22.5,0.033431,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
55,13.655938,13.0,17.0,22.5,0.065439,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
56,13.336388,14.0,13.0,22.5,0.006302,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
57,13.282775,14.0,14.0,22.5,0.016416,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
58,13.706066,14.0,15.0,22.5,0.034617,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
59,13.754028,14.0,16.0,22.5,0.059181,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...
60,14.115566,14.0,17.0,22.5,0.075246,lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/l...


In [15]:
result.columns

Index([u'rg', u'sx', u'sy', u'sz', u'r', u'fnames'], dtype='object')

In [16]:
import glob

In [17]:
goal_fname = 'data/lysozyme_00001.iq'
goal_iq = np.loadtxt(goal_fname)[:, :2]
nq, _ = goal_iq.shape

In [18]:
run_dir = 'lys_ellipsoid_scan1/sascalc/neutron_D2Op_100/'
ext = '*.iq'
file_search = os.path.join(run_dir, ext)
iq_fnames = glob.glob(file_search)
assert iq_fnames, 'ERROR: no files found using {}'.format(file_search)
iq_fnames.sort()

iq_data = np.empty((nq, len(iq_fnames)+1), order='F')
iq_data[:, 0] = goal_iq[:, 0]
for i, fname in enumerate(iq_fnames):
    this_iq_data = np.loadtxt(fname)
    iq_data[:, i+1] = this_iq_data[:, 1] * iq_goal[0, 1] / this_iq_data[0, 1]

In [25]:
p1 = Scatter(result, x='rg', y='r', color=palette[1], width=400, height=400, 
             xlabel='Rg (A)', ylabel='R-factor')
vline = Span(location=atom_rg, dimension='height', line_color=palette[0], line_width=3)
p1.renderers.extend([vline])

p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)') #, x_axis_type='log', y_axis_type='log')

# for i in xrange(len(iq_fnames)):
#     p2.line(iq_data[:, 0], iq_data[:, i+1], color='black', line_width=2, legend='scan')
p2.line(iq_goal[:, 0], iq_goal[:, 1], color=palette[0], line_width=2, line_dash=[6, 6], 
        legend='experiment')
p2.line(iq_data[:, 0], iq_data[:, i_best], color=palette[1], line_width=2, line_dash=[5, 1], 
        legend='best')
p2.line(iq_model[:, 0], iq_model[:, 1], color=palette[2], line_width=2, 
        legend='22.5x15x15 model')
p2.line(iq_data[:, 0], iq_data[:, i_worst], color=palette[3], line_width=2, line_dash=[4, 2, 2], 
        legend='worst')

x_range = (iq_goal[1, 0], iq_goal[-1, 0])
y_range = (min(iq_goal[:, 1]) * 0.5, iq_goal[1, 1] * 1.5)
logging.debug('x range: {}'.format(x_range))
logging.debug('y range: {}'.format(y_range))
p2.x_range = Range1d(*x_range)
p2.y_range = Range1d(*y_range)
#p2.legend.location = 'bottom_left'

plots = gridplot([[p1, p2]])
show(plots)

DEBUG: x range: (0.0050000000000000001, 0.29999999999999999)
DEBUG: y range: (0.0038509999999999998, 1.4977845000000001)


1. Does Rg from the Guinier of the I(q) curves match the pdb structures

In [20]:
r_factors = pd.read_csv('./lys_ellipsoid_r1/r_factors.log', delimiter=':', index_col=[0], names=['log_level', 'label', 'r'])
r_factors.head
r_factors.head()
i_min = r_factors.r.idxmin()

# r_factors.loc[i_min]
logging.info('i min: {}'.format(i_min))
logging.info('r min:\n{}'.format(r_factors.loc[i_min]))

INFO: i min: 1770
INFO: r min:
log_level         DEBUG
label          R-factor
r            0.00758195
Name: 1770, dtype: object


```
DEBUG: parameters:                                                                                                                                                               
[[  9.85495473   0.           0.           0.        ]                                                                                                                           
 [  0.           5.82059941   0.           0.        ]                                                                                                                           
 [  0.           0.          14.18517289   0.        ]                                                                                                                           
 [  0.           0.           0.           1.        ]]                                                                                                                          
DEBUG: R-factor: 0.00758195057103         
```