# Experimental Lysozyme
## Determine ellipsoid geometry that will reproduce the experimental scattering from a SANS measurement off Lysozyme (norm_1mgml.dat)

In [1]:
import numpy as np
import pandas as pd

In [2]:
import os
import glob

In [3]:
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

In [4]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Range1d, Span
from bokeh.palettes import Colorblind8 as palette
from bokeh.layouts import gridplot
from bokeh.charts import Scatter

In [5]:
import sys
sys.path.append('../../')
import calc_r

In [6]:
output_notebook()

In [7]:
raw0 = np.loadtxt('exp_data_lysozyme.dat')  # reduction from nick
raw1 = np.loadtxt('norm_1mgml.dat')  # reduciton from monica

In [8]:
def errorbar(fig, x, y, xerr=None, yerr=None, color='red', 
             point_kwargs={}, error_kwargs={}):

    fig.circle(x, y, color=color, **point_kwargs)

    if xerr is not None:
        x_err_x = []
        x_err_y = []
        for px, py, err in zip(x, y, xerr):
            x_err_x.append((px - err, px + err))
            x_err_y.append((py, py))
        fig.multi_line(x_err_x, x_err_y, color=color, **error_kwargs)

    if yerr is not None:
        y_err_x = []
        y_err_y = []
        for px, py, err in zip(x, y, yerr):
            y_err_x.append((px, px))
            y_err_y.append((py - err, py + err))
        fig.multi_line(y_err_x, y_err_y, color=color, **error_kwargs)

In [9]:
p1 = figure(title='Original Data, Linear Scale', x_axis_label='q (1/A)', y_axis_label='I(q)',
            width=400, height=400)
p2 = figure(title='Original Data, Log Scale', x_axis_label='q (1/A)', y_axis_label='I(q)', 
            width=400, height=400, x_axis_type='log', y_axis_type='log')

errorbar(p1, raw0[:, 0], raw0[:, 1], yerr=raw1[:, 2], color=palette[1], point_kwargs={'legend': 'nick'})
errorbar(p1, raw1[:, 0], raw1[:, 1], yerr=raw1[:, 2], color=palette[0], point_kwargs={'legend': 'monica'})

errorbar(p2, raw0[:, 0], raw0[:, 1], color=palette[1], yerr=raw1[:, 2])
errorbar(p2, raw1[:, 0], raw1[:, 1], color=palette[0], yerr=raw1[:, 2])

x_range = raw0[1, 0], raw0[-1, 0]
y_range = min(raw1[raw1[:, 1]>0, 1]) * 0.5, raw0[1, 1] * 1.5
p1.x_range = Range1d(*x_range)
p1.y_range = Range1d(*y_range)
p2.x_range = p1.x_range
p2.y_range = p1.y_range

fig = gridplot([[p1, p2]])
show(fig)

In [10]:
q_max = 0.4
q_min = raw1[0 ,0]
d_max = 45
raw_n_q = (q_max - q_min) * d_max / np.pi
logging.info('Rambo N_q: {}'.format(raw_n_q))
n_q = np.int(np.round(raw_n_q))
logging.debug('n_q = {}'.format(n_q))
print('n_q = {}'.format(n_q))

INFO: Rambo N_q: 5.348704257
DEBUG: n_q = 5


n_q = 5


According to the Shannon channels, described by [Peter Moore](http://scripts.iucr.org/cgi-bin/paper?a19456) and also [Rob Rambo](http://www.nature.com/nature/journal/v496/n7446/extref/nature12070-s1.pdf), there are only 5 or 6 independent data channels, depending on if `q_min` is 0 or the lowest measured q-value.  Using this few of points seems far too few, by at least a factor of 2.

In [11]:
force_n_q = 6
q_grid = np.linspace(0, q_max, force_n_q)
data6 = calc_r.rebin(raw1[raw1[:, 0] < q_grid[-1] + q_grid[1] / 2], q_grid, i0=0.0112)
save_fname = 'rebinned_{}_exp_data_lysozyme.dat'.format(force_n_q)
logging.info('saving data to: {}'.format(save_fname))
np.savetxt(save_fname, data6)

INFO: rebinning to evenly spaced grid, dq = 0.08
INFO: saving data to: rebinned_6_exp_data_lysozyme.dat


In [12]:
p1 = figure(title='Linear Scale', x_axis_label='q (1/A)', y_axis_label='I(q)',
            width=400, height=400)
p2 = figure(title='Log Scale', x_axis_label='q (1/A)', y_axis_label='I(q)', 
            width=400, height=400, x_axis_type='log', y_axis_type='log')

errorbar(p1, raw1[:, 0], raw1[:, 1], yerr=raw1[1:, 2], color=palette[0])
errorbar(p1, data6[:, 0], data6[:, 1], yerr=data6[:, 2], color=palette[1])

# p2.circle(raw1[:, 0], raw1[:, 1], color=palette[0]) #, yerr=raw1[:, 2])
errorbar(p2, raw1[:, 0], raw1[:, 1], color=palette[0], yerr=raw1[:, 2])
errorbar(p2, data6[:, 0], data6[:, 1], yerr=data6[1:, 2], color=palette[1])

x_range = raw1[1, 0], raw1[-1, 0]
y_range = min(raw1[raw1[:, 1]>0, 1]) * 0.5, raw1[1, 1] * 1.5
p1.x_range = Range1d(*x_range)
p1.y_range = Range1d(*y_range)
p2.x_range = p1.x_range
p2.y_range = p1.y_range

fig = gridplot([[p1, p2]])
show(fig)

In [13]:
# guinier plot
p1 = figure(title='Guinier Plot', x_axis_label='q^2 (1/A^2)', y_axis_label='ln[I(q)]')
p1.circle(raw1[:, 0] ** 2, np.log(raw1[:, 1]), color=palette[0])
p1.circle(data6[:, 0] ** 2, np.log(data6[:, 1]), color=palette[1])
show(p1)

# Find best ellipsoid that reproduces experimental data

In [14]:
scan_dir = '../../lys_ellipsoid_scan1/'
log_fname = os.path.join(scan_dir, 'sx13to17_sy13to17_sz20.5to24.5.log')
log = pd.read_csv(log_fname, delimiter='\t', index_col=[0], escapechar='#')
log.rename(columns=lambda x: x.strip(), inplace=True)

In [15]:
log.head()

Unnamed: 0_level_0,rg,sx,sy,sz
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,12.320748,13.0,13.0,20.5
2,12.507873,13.0,14.0,20.5
3,12.706,13.0,15.0,20.5
4,13.069334,13.0,16.0,20.5
5,13.311778,13.0,17.0,20.5


In [16]:
i0 = np.array([0, 0.0112, 0.0112 / 100])
raw_data = np.vstack((i0, raw1))

In [21]:
results = log
nqs = [6, 8, 12, 16, 20, 24, 61]
best5 = []
worst5 = []
iqs = {}
for nq in nqs: 
    sas_dir = os.path.join(scan_dir, 'sascalc_{}/neutron_D2Op_100/'.format(nq))
    
    r_factors, this_iq = calc_r.sascalc_r_factors(sas_dir, raw_data, run_name='nq_{}'.format(nq))
    
    iqs[nq] = this_iq
    r_factors.rename(columns=lambda old_name: '{}{}'.format(old_name, nq), inplace=True)
    results = pd.concat([results, r_factors], axis=1)
    i_best = results['r{}'.format(nq)].idxmin()
    i_worst = results['r{}'.format(nq)].idxmax()
    logging.info('best, worst: {}, {}'.format(i_best, i_worst))

    best5.append(r_factors.sort('r{}'.format(nq))[:5].index)
    worst5.append(r_factors.sort('r{}'.format(nq), ascending=False)[:5].index)
results.head()

INFO: rebinning experimental data to scattering q-grid
INFO: rebinning to evenly spaced grid, dq = 0.08
INFO: best, worst: 76, 125
INFO: rebinning experimental data to scattering q-grid
INFO: rebinning to evenly spaced grid, dq = 0.057143
INFO: best, worst: 51, 125
INFO: rebinning experimental data to scattering q-grid
INFO: rebinning to evenly spaced grid, dq = 0.036364
INFO: best, worst: 76, 125
INFO: rebinning experimental data to scattering q-grid
INFO: rebinning to evenly spaced grid, dq = 0.026667
INFO: best, worst: 76, 125
INFO: rebinning experimental data to scattering q-grid
INFO: rebinning to evenly spaced grid, dq = 0.021053
INFO: best, worst: 51, 125
INFO: rebinning experimental data to scattering q-grid
INFO: rebinning to evenly spaced grid, dq = 0.017391
INFO: best, worst: 101, 125
INFO: rebinning experimental data to scattering q-grid
INFO: rebinning to evenly spaced grid, dq = 0.006667
INFO: best, worst: 76, 125


Unnamed: 0_level_0,rg,sx,sy,sz,fnames6,r6,scale6,fnames8,r8,scale8,...,scale16,fnames20,r20,scale20,fnames24,r24,scale24,fnames61,r61,scale61
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,12.320748,13.0,13.0,20.5,../../lys_ellipsoid_scan1/sascalc_6/neutron_D2...,0.050006,0.01056,../../lys_ellipsoid_scan1/sascalc_8/neutron_D2...,0.045487,0.010974,...,0.011282,../../lys_ellipsoid_scan1/sascalc_20/neutron_D...,0.055131,0.01118,../../lys_ellipsoid_scan1/sascalc_24/neutron_D...,0.078814,0.011456,../../lys_ellipsoid_scan1/sascalc_61/neutron_D...,0.077597,0.011322
2,12.507873,13.0,14.0,20.5,../../lys_ellipsoid_scan1/sascalc_6/neutron_D2...,0.049186,0.010675,../../lys_ellipsoid_scan1/sascalc_8/neutron_D2...,0.039237,0.011032,...,0.011294,../../lys_ellipsoid_scan1/sascalc_20/neutron_D...,0.047318,0.011188,../../lys_ellipsoid_scan1/sascalc_24/neutron_D...,0.070353,0.011478,../../lys_ellipsoid_scan1/sascalc_61/neutron_D...,0.067938,0.011334
3,12.706,13.0,15.0,20.5,../../lys_ellipsoid_scan1/sascalc_6/neutron_D2...,0.051915,0.010799,../../lys_ellipsoid_scan1/sascalc_8/neutron_D2...,0.037821,0.011095,...,0.011308,../../lys_ellipsoid_scan1/sascalc_20/neutron_D...,0.043413,0.011196,../../lys_ellipsoid_scan1/sascalc_24/neutron_D...,0.064515,0.011501,../../lys_ellipsoid_scan1/sascalc_61/neutron_D...,0.06366,0.011348
4,13.069334,13.0,16.0,20.5,../../lys_ellipsoid_scan1/sascalc_6/neutron_D2...,0.050619,0.01103,../../lys_ellipsoid_scan1/sascalc_8/neutron_D2...,0.036886,0.011212,...,0.011333,../../lys_ellipsoid_scan1/sascalc_20/neutron_D...,0.041732,0.011212,../../lys_ellipsoid_scan1/sascalc_24/neutron_D...,0.060075,0.011545,../../lys_ellipsoid_scan1/sascalc_61/neutron_D...,0.058126,0.011373
5,13.311778,13.0,17.0,20.5,../../lys_ellipsoid_scan1/sascalc_6/neutron_D2...,0.051917,0.011194,../../lys_ellipsoid_scan1/sascalc_8/neutron_D2...,0.04794,0.011294,...,0.011351,../../lys_ellipsoid_scan1/sascalc_20/neutron_D...,0.052515,0.011223,../../lys_ellipsoid_scan1/sascalc_24/neutron_D...,0.067262,0.011576,../../lys_ellipsoid_scan1/sascalc_61/neutron_D...,0.067859,0.011391


In [23]:
best5, worst5

([Int64Index([76, 51, 81, 56, 52], dtype='int64'),
  Int64Index([51, 76, 52, 27, 26], dtype='int64'),
  Int64Index([76, 51, 52, 56, 27], dtype='int64'),
  Int64Index([76, 51, 81, 52, 101], dtype='int64'),
  Int64Index([51, 76, 52, 27, 81], dtype='int64'),
  Int64Index([101, 76, 81, 77, 51], dtype='int64'),
  Int64Index([76, 51, 101, 81, 52], dtype='int64')],
 [Int64Index([125, 120, 100, 124, 119], dtype='int64'),
  Int64Index([125, 100, 120, 124, 119], dtype='int64'),
  Int64Index([125, 100, 120, 124, 119], dtype='int64'),
  Int64Index([125, 100, 120, 124, 119], dtype='int64'),
  Int64Index([125, 120, 100, 124, 119], dtype='int64'),
  Int64Index([125, 100, 120, 124, 119], dtype='int64'),
  Int64Index([125, 100, 120, 124, 119], dtype='int64')])

In [None]:
# TODO: add heatmap describing the similarity in the best N between the different dq
# http://bokeh.pydata.org/en/latest/docs/gallery/categorical.html

In [24]:
figs = []
for nq_y in nqs:
    row = []
    for nq_x in nqs:
        key = 'r{}_x_r{}'.format(nq_x, nq_y)
        row.append(Scatter(results, x='r{}'.format(nq_x), y='r{}'.format(nq_y), 
                           xlabel='', ylabel='', title='{} vs {}'.format(nq_y, nq_x),
                           width=133, height=133))
    figs.append(row)
        
fig = gridplot(figs)
show(fig)

In [35]:
iqs_model = {}
model_fname = '../lys_ellipsoid_61_00001.iq'
iq_model = np.loadtxt(model_fname)[:, :3]
for nq in nqs:
    q_grid = np.linspace(iqs[nq][0, 0], iqs[nq][-1, 0], nq)
    iqs_model[nq] = calc_r.rebin(iq_model[1:], q_grid, i0=iq_model[0, 1])

INFO: rebinning to evenly spaced grid, dq = 0.08
INFO: rebinning to evenly spaced grid, dq = 0.0571428571429
INFO: rebinning to evenly spaced grid, dq = 0.0363636363636
INFO: rebinning to evenly spaced grid, dq = 0.0266666666667
INFO: rebinning to evenly spaced grid, dq = 0.0210526315789
INFO: rebinning to evenly spaced grid, dq = 0.0173913043478
INFO: rebinning to evenly spaced grid, dq = 0.00666666666667


some q bins will not have any data (q < 0.006667), proceed with caution


In [38]:
nq = 12
iq_data = np.empty((nq, len(results)+1), order='F')
iq_data[:, 0] = iqs[nq][:, 0]
for i in range(len(results)):
    this_iq_data = np.loadtxt(results['fnames{}'.format(nq)].iloc[i])
    iq_data[:, i+1] = this_iq_data[:, 1] * results['scale{}'.format(nq)].iloc[i]

In [39]:
p1 = Scatter(results, x='rg', y='r{}'.format(nq), color=palette[1], width=400, height=400, 
             xlabel='Rg (A)', ylabel='R-factor')
# vline = Span(location=atom_rg, dimension='height', line_color=palette[0], line_width=3)
# p1.renderers.extend([vline])

p2 = figure(width=400, height=400, x_axis_label='q (1/A)', y_axis_label='I(q)') #, x_axis_type='log', y_axis_type='log')

for i in results.index:
    p2.line(iq_data[:, 0], iq_data[:, i], color='black', line_width=2, legend='scan')
p2.line(iqs_model[nq][:, 0], iqs_model[nq][:, 1], color=palette[2], line_width=2, 
        legend='22.5x15x15 model')
errorbar(p2, iqs[nq][:, 0], iqs[nq][:, 1], yerr=iqs[nq][:, 2], color=palette[0], 
         point_kwargs={'legend': 'experiment'})
p2.line(iq_data[:, 0], iq_data[:, i_best], color=palette[1], line_width=2, line_dash=[5, 1], 
        legend='best')
p2.line(iq_data[:, 0], iq_data[:, i_worst], color=palette[3], line_width=2, line_dash=[4, 2, 2], 
        legend='worst')


plots = gridplot([[p1, p2]])
show(plots)