## Config

In [1]:
pickle_dir = '/data/xenon/ehogenbi/psd_simulation/pickles/'
data_dir   = '/data/xenon/ehogenbi/psd_simulation/processed/'

## Default imports

In [2]:
import numpy as np
# import matplotlib
# import matplotlib.pyplot as plt
# %matplotlib inline

import blueice as bi
from multihist import Histdd, Hist1d
import pandas as pd
import scipy
import re
import os

## Pulse simulation

In [3]:
def simulate_photon_times(n_photons, t1, t3, fs, tts):
    n_photons = int(n_photons)
    times = np.zeros(n_photons)
    n_singlets = np.random.binomial(n=n_photons, p=fs)
    times += np.concatenate([
        np.random.exponential(t1, n_singlets),
        np.random.exponential(t3, n_photons - n_singlets)
    ])
    np.random.shuffle(times)
    times += np.random.normal(0, tts, size=n_photons)
    return times

In [4]:
def build_likelihood_function(n_photons, t1, t3, fs, tts, hist_range, hist_bins, 
                              offset = None, plot=False, ):
    if offset is None:
        offset = 1 / n_photons
    times = simulate_photon_times(n_photons, t1, t3, fs, tts)
    vals, edges  = np.histogram(times, range = hist_range, bins = hist_bins, normed=True)
    vals = np.max([vals, np.ones(len(vals)) * offset], axis=0)
    centers = 0.5 * (edges[:-1] + edges[1:])
    def f(x):
        return np.interp(x, centers, vals)
    def negloglikelihood(x):
        return np.interp(x, centers, -np.log(vals))
    
    if plot:
        x_plot = centers
        plt.hist(times, range = hist_range, bins = hist_bins, normed=True, histtype = 'step')
        plt.plot(centers, f(centers))
        plt.yscale('log')
    return negloglikelihood

In [5]:
from tqdm import tqdm
def max_likelihood(s1, f):
    '''
    Minimize the value of the sum of the negative sum likelikhood given an array of times by shifting it 
    by a constant value in time.
    Returns (sum neg log likelihood, shift)
    '''
    optres = scipy.optimize.minimize(lambda x: np.sum(f(s1 + x)), 0.)
    shift = optres.x[0]
    like  = optres.fun
    return like, shift

In [6]:
def likelihood_ratio(s1, f_er, f_nr):
    '''
    Compute the log likelihood ratio of nr vs er.
    (Note: it is minus cause there is already a log in max_likelihood!)
    Low value is very NR-like.
    '''
    er_likelihood = max_likelihood(s1, f_er)
    nr_likelihood = max_likelihood(s1, f_nr)
    LR = nr_likelihood[0] - er_likelihood[0]
    return LR

## Dataset processing

In [7]:
def add_props(df, band):
    df['band'] = band
    # We do not divide by 1.15, already done in simulation
    g1 =  0.1442 
    g2 = 11.52 
    df['e_rec'] = 13.7e-3 * ((df['cs1'] / g1) + (df['cs2'] / g2))
    return 

In [8]:
def get_fit_values(E, band, X, field='low', tts= None):
    '''
    Extract the fit values for specific energies.
    Returns a dict containing key values for that energy, interpolated!
    If `return_tts`, will include tts as key.
    '''
    # Select fit values that are relevant
    if field == 'high':
        if band == 'er':
            x = X[0]
        elif band == 'nr':
            x = X[1]
    if field == 'low':
        if band == 'er':
            x = X[2]
        elif band == 'nr':
            x = X[3]
    
    es = x['e'] # energy list for the fit points
    ret = {}
    if not tts:
        keys = ['t3', 'fs', 't1', 'tts']
    else:
        keys = ['t3', 'fs', 't1']
    # Loop over keys and interpolate between values.    
    for key in keys:
        ret[key] = np.interp(E, es, x[key])
    if tts:
        ret['tts'] = tts
        
    return ret

In [9]:
def get_lr(el, F_er, F_nr, X, field='low', tts=None):
    '''
    Simulate the shape of a pulse and return this value
    '''
    # We take the reconstructed energy: it must be valid for both ER and NR!
    E = el['e_rec']
    if E > 20:
        print('Warning: energy %.1f out of range, replacing with 20 keV...' % (E))
        E = 20.
    
    # We have bins of 1 keV (seems sufficient) so round and pull the likelihood function from the database
    f_er = F_er[int(round(E))]
    f_nr = F_nr[int(round(E))]
    
    # Now simulate the shape given the reconstructed energy
    s1 = simulate_photon_times(el['s1_photons_detected'], **get_fit_values(E, el['band'], X, field=field, tts=tts))
    # Get likelihood
    lr = likelihood_ratio(s1, f_er, f_nr)
    return lr

In [10]:
def add_lr(df, F_er, F_nr, X, print_every = int(1e4), field='low', tts=None):
    '''
    Add the likelihood ratio to all events in dataframe.
    '''
    lrs = []
    for i, el in df.iterrows():
        lrs.append(get_lr(el, F_er, F_nr, X, field=field, tts=tts))
        if i % print_every == 0:
            print('%d of %d done, %.1f %%...' % (i, len(df), i / len(df) * 100))
    df['lr'] = lrs
    return

In [11]:
def produce_likelihood_library(start, stop, field, X, tts = None, n_photons = int(1e7),
                               hist_range = (-50, 250), hist_bins = 1000):
    '''
    Produce the likelihoods per keV, for both ER and NR. 
    '''
    num = (stop - start + 1)
    F_er = np.array([
        build_likelihood_function(int(1e7), hist_range = (-50, 250), hist_bins = 1000,
                                **get_fit_values(energy, 'er', X, field = field, tts = tts)) 
                                for energy in np.linspace(start, stop, num)])
    F_nr = np.array([
        build_likelihood_function(int(1e7), hist_range = (-50, 250), hist_bins = 1000,
                                **get_fit_values(energy, 'nr', X, field = field, tts = tts)) 
                                for energy in np.linspace(start, stop, num)])
    return F_er, F_nr

## Make datasets

In [12]:
# Get base model
# If this is the first time you run it, it will take a few minutes
print('Making base model....')
from laidbax import base_model
print('Done!')

Making base model....
Done!


## Import relevant data

In [13]:
import os

In [14]:
X = pickle.load(open(os.path.join(pickle_dir, 'X.pickle'), 'rb')) # Contains fit values
Y = pickle.load(open(os.path.join(pickle_dir, 'X.pickle'), 'rb')) # Fits with tts fixed at 1.5

## Dumping files

In [None]:
def store_result(df, data_dir, base_name):
    # Check current files in folder
    fns = file_list(data_dir, base_name)
    # This is the number that the file should get.
    i = len(fns)
    name = os.path.join(data_dir, base_name + '_%04d.pickle' % i)
    pickle.dump(df[['cs1', 'cs2', 'lr']], open(name, 'wb'))
    print('Dumped %d events to file %s. Done!' % (len(df), name))
    return 

In [None]:
def file_list(data_dir, base_name):
    '''
    List the files
    '''
    fns = os.listdir(data_dir)
    fns = [os.path.join(data_dir, fn) for fn in fns if re.match(base_name + '_.....pickle', fn)]
    fns = np.sort(fns)
    return fns