# Sampling experimental lysozyme data to a finer q-grid

In [1]:
import numpy as np

In [21]:
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

In [3]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Range1d
from bokeh.palettes import Dark2_5 as palette
from bokeh.layouts import gridplot

In [4]:
output_notebook()

In [5]:
raw = np.loadtxt('exp_data_lysozyme.dat')

In [6]:
def errorbar(fig, x, y, xerr=None, yerr=None, color='red', 
             point_kwargs={}, error_kwargs={}):

    fig.circle(x, y, color=color, **point_kwargs)

    if xerr is not None:
        x_err_x = []
        x_err_y = []
        for px, py, err in zip(x, y, xerr):
            x_err_x.append((px - err, px + err))
            x_err_y.append((py, py))
        fig.multi_line(x_err_x, x_err_y, color=color, **error_kwargs)

    if yerr is not None:
        y_err_x = []
        y_err_y = []
        for px, py, err in zip(x, y, yerr):
            y_err_x.append((px, px))
            y_err_y.append((py - err, py + err))
        fig.multi_line(y_err_x, y_err_y, color=color, **error_kwargs)

In [7]:
p1 = figure(title='Original Data, Linear Scale', x_axis_label='q (1/A)', y_axis_label='I(q)',
            width=400, height=400)
p2 = figure(title='Original Data, Log Scale', x_axis_label='q (1/A)', y_axis_label='I(q)', 
            width=400, height=400, x_axis_type='log', y_axis_type='log')

errorbar(p1, raw[:, 0], raw[:, 1], yerr=raw[:, 2], color=palette[0])
# p2.circle(raw[:, 0], raw[:, 1], color=palette[0]) #, yerr=raw[:, 2])
errorbar(p2, raw[:, 0], raw[:, 1], color=palette[0], yerr=raw[:, 2])

x_range = raw[1, 0], raw[-1, 0]
y_range = min(raw[raw[:, 1]>0, 1]) * 0.5, raw[1, 1] * 1.5
p1.x_range = Range1d(*x_range)
p1.y_range = Range1d(*y_range)
p2.x_range = p1.x_range
p2.y_range = p1.y_range

fig = gridplot([[p1, p2]])
show(fig)

In [9]:
def rebin(data, q_grid, i0=None, er_i0=None):
    '''
    rebin data with error, follows white paper hosted here:
    http://isi.ssl.berkeley.edu/~tatebe/whitepapers/Combining%20Errors.pdf
    
    this is based on the following 2 assumptions regarding the number
    of pixels binned into the input data points, N
        1. N is roughly equal between neighboring point
        2. N is very large
    '''
    
    # verify the new q_grid is evenly spaced
    dq = q_grid[1:] - q_grid[:-1]
    if np.alltrue(np.isclose(dq - dq[0], 0)):
        dq = dq[0]
        logging.info('rebinning to evenly spaced grid, dq = {}'.format(dq))
    else:
        raise ValueError('unevenly spaced grid not yet supported')
    
    # create array for rebinned data
    new_data = np.empty((len(q_grid), 3))
    new_data[:, 0] = q_grid
    
    # rebin the data to the new q_grid
    bins = np.append(q_grid - dq/2, q_grid[-1] + dq/2)
    inds = np.digitize(data[:, 0], bins)
    
    for ind in set(inds):
        # get the average scattering intensity
        new_data[ind-1, 1] = np.mean(data[inds==ind, 1])

        # calculate the error in quadrature, based on unknown bin counts
        new_data[ind-1, 2] = np.sqrt(np.mean(data[inds==ind, 2] ** 2))
    
    # populate the q=0 scattering intensty
    if i0:
        new_data[0, 1] = i0
    else:
        logging.warning('I(0) unknown')
        
    if er_i0:
        new_data[0, 2] = er_i0
    else:
        logging.warning('I(0) error unknown')
        
    return new_data

In [38]:
q_max = 0.4
q_min = 0.0
d_max = 45
n_q = np.round((q_max - q_min) * d_max / np.pi)
logging.debug('n_q = {}'.format(n_q))
print('n_q = {}'.format(n_q))

n_q = 6.0


In [39]:
dq = np.pi/d_max
round(q_max/dq)

6.0

In [40]:
q_grid = np.linspace(0, 0.4, n_q)
new_data = rebin(raw, q_grid, i0=0.0116)

In [41]:
q_grid

array([ 0.  ,  0.08,  0.16,  0.24,  0.32,  0.4 ])

In [42]:
p1 = figure(title='Linear Scale', x_axis_label='q (1/A)', y_axis_label='I(q)',
            width=400, height=400)
p2 = figure(title='Log Scale', x_axis_label='q (1/A)', y_axis_label='I(q)', 
            width=400, height=400, x_axis_type='log', y_axis_type='log')

errorbar(p1, raw[1:, 0], raw[1:, 1], yerr=raw[1:, 2], color=palette[0])
errorbar(p1, new_data[:, 0], new_data[:, 1], yerr=new_data[:, 2], color=palette[1])

# p2.circle(raw[:, 0], raw[:, 1], color=palette[0]) #, yerr=raw[:, 2])
errorbar(p2, raw[:, 0], raw[:, 1], color=palette[0], yerr=raw[:, 2])
errorbar(p2, new_data[1:, 0], new_data[1:, 1], yerr=new_data[1:, 2], color=palette[1])

x_range = raw[1, 0], raw[-1, 0]
y_range = min(raw[raw[:, 1]>0, 1]) * 0.5, raw[1, 1] * 1.5
p1.x_range = Range1d(*x_range)
p1.y_range = Range1d(*y_range)
p2.x_range = p1.x_range
p2.y_range = p1.y_range

fig = gridplot([[p1, p2]])
show(fig)

In [43]:
# guinier plot
p1 = figure(title='Guinier Plot', x_axis_label='q^2 (1/A^2)', y_axis_label='ln[I(q)]')
p1.circle(raw[:, 0] ** 2, np.log(raw[:, 1]), color=palette[0])
#show(p1)

# Fit ellipsoid to experimental data