In [2]:
%run _if_on_colab_setup_flamedisx.ipynb

In [6]:
import pandas as pd
np = pd.np
import flamedisx as fd
import matplotlib.pyplot as plt
import scipy
from tqdm import tqdm

%matplotlib inline

import tensorflow as tf
have_gpu = tf.test.is_gpu_available()
tf.__version__, tf.test.is_built_with_gpu_support(), have_gpu

('2.0.0', False, False)

In [5]:
# Restart runtime if this says: <module 'flamedisx' (namespace)>
fd

<module 'flamedisx' from '/home/aalbers/software/flamedisx/flamedisx/__init__.py'>

In [2]:
import flamedisx as fd
fd.LXeSource.tpc_radius

47.9

In [None]:
from collections import defaultdict

class Sensitivity:
    
    def __init__(self,
                 fv=1.3,  # FV to use (approx, just shrinking radius of Xe1T)
                 exposure_mult=1.0,   # Multiplier for true exposure
                 mwimp=2e2,  # WIMP mass
                 xsec=4.7e-47,  # cm^2 xsec to use
                 n_spectra=1,  # Number of time points to compute WIMP spectra for
                 e_rec_max=50.,  # Max recoil energy in WIMP spectra [keV]
                 batch_size=700 if have_gpu else 20,  # Number of events per batch in each source
                 ):
        assert n_spectra > 0
        
        # Calculate radius of detector volume
        # TODO: also adjust z
        r = fd.LXeSource.tpc_radius
        l = fd.LXeSource.tpc_length
        v_xe1t = np.pi * r ** 2 * l
        v_small = v_xe1t / 2 * fv
        r_small = (v_small / l / np.pi)**0.5
        print(f'TPC with radius {r_small :.3g} cm holds {fv} tonnes LXe')
    
        class myERSource(fd.SR1ERSource):
            tpc_radius = r_small  # cm
            

        class myWIMPSource(fd.SR1WIMPSource):
            tpc_radius = r_small  # cm
    
            # WIMP settings
            mw = mwimp
            n_in = n_spectra + 1  # n_in is bin_edges
            es = np.geomspace(0.7, e_rec_max, 100)
            sigma_nucleon = xsec
        
        self.lf = fd.LogLikelihood(sources=dict(er=myERSource,
                                                wimp=myWIMPSource),
                                   free_rates=('er', 'wimp'),
                                   batch_size=batch_size)
        
        # Set true rate multipliers such that mean number of
        # ER events is about 627 and WIMP events is about 3.56
        # TODO: can probably call mu_itps here
        self.er_rate_true = 627 / self.lf.sources['er'].estimate_mu()
        self.wimp_rate_true = 3.56 / self.lf.sources['wimp'].estimate_mu()
        print(f"True rate multipliers: ER {self.er_rate_true :.3g},"
              f" NR {self.wimp_rate_true :.3g}")

        # UL only
        self.crit_val = scipy.stats.norm.ppf(0.9) ** 2

    def ll_check(self, bf, ll_best=None):
        """Return dictionary with points for likelihood parabola plot"""
        # TODO: shouldn't we refit er_rate_multiplier each time here?
        # Well, depends on what you want to draw...
        if ll_best is None:
            ll_best = self.lf(**bf)
    
        xs = np.linspace(0.0, 3.5, 50)
        ys = np.array([self.lf(wimp_rate_multiplier=x,
                               er_rate_multiplier=bf['er_rate_multiplier'])
                       for x in xs])
        return dict(xs=xs, ys=ys, ll_best=ll_best)

    def toymc(self, do_ll_check=False):
        # Simulate background-only data
        d = self.lf.simulate(er_rate_multiplier=self.er_rate_true,
                             wimp_rate_multiplier=0.)
        self.lf.set_data(d)
    
        guess = dict(er_rate_multiplier=0.9,
                     wimp_rate_multiplier=0.1)
    
        # Determine the global best fit
        bf = self.lf.bestfit(guess=guess, llr_tolerance=0.0002)
        ll_best = self.lf(**bf)
    
        ul_guess = bf.copy()
        wrm_guess = max(1.5, 3 * bf['wimp_rate_multiplier'])
        ul_guess['wimp_rate_multiplier'] = wrm_guess
    
        def t_stat(wimp_rate_multiplier):
            return -2 * (self.lf(er_rate_multiplier=bf['er_rate_multiplier'],
                                 wimp_rate_multiplier=wimp_rate_multiplier)
                         - ll_best)
    
        while t_stat(wrm_guess) < self.crit_val:
            wrm_guess += 0.5
            print(f'increasing ul_guess to {wrm_guess}')
        ul_guess['wimp_rate_multiplier'] = wrm_guess
    
        ul = self.lf.one_parameter_interval('wimp_rate_multiplier',
                                            bestfit=bf,
                                            guess=ul_guess,
                                            llr_tolerance=0.0002)
        check = dict()
        if do_ll_check:
            check = self.ll_check(bf, ll_best=ll_best)
        
        return {**bf, **check,
                'ul':ul,
                'ul_guess': ul_guess['wimp_rate_multiplier']}
        
    def run_toys(self, n, do_ll_check=False, df=True, save_name=None):
        res = np.array([self.toymc(do_ll_check=do_ll_check)
                        for _ in tqdm(range(n))])
        
        if df or save_name is not None:
            d = defaultdict(list)
            for toy in res:
                for k, v in toy.items():
                    d[k].append(v)
            res_df = pd.DataFrame(d)
        
        if save_name is not None:
            res_df.to_pickle(save_name)
        
        if df:
            return res_df
        return res
    
    def sim_set_and_call(self):
        d = self.lf.simulate(er_rate_multiplier=self.er_rate_true,
                             wimp_rate_multiplier=0.)
        self.lf.set_data(d)

        lnL = self.lf()
        return d, lnL
    
    def load(self, names):
        dfs = []
        for name in names:
            dfs.append(pd.read_pickle(name))
        return pd.concat(dfs, sort=False).reset_index(drop=True)

# Run toyMCs

In [None]:
sensitivity = Sensitivity(
    fv=1.3,  # FV to use (approx, just shrinking radius of Xe1T)
    mwimp=2e1,  # WIMP mass
    xsec=4.7e-47,  # cm^2 xsec to use
    n_spectra=1,  # Number of WIMP spectra to use
    e_rec_max=50.,  # Max recoil energy in WIMP spectra [keV]
    batch_size=700,  # Number of events per batch in each source
    )

In [None]:
%%time
# Set some data and call likelihood, this will trigger the tracing (takes about 7 seconds)
a, b = sensitivity.sim_set_and_call()
print(len(a), b)

In [None]:
%%time
# Now calling likelihood should be fast (~250 ms)
sensitivity.lf()

In [None]:
toys_df_mod = sensitivity.run_toys(200, do_ll_check=False, save_name='toys_mw20.pkl')

# Results

In [None]:
toys_df

In [None]:
plt.scatter(toys_df['wimp_rate_multiplier'],
            toys_df['er_rate_multiplier'],
            s=3)
plt.axhline(sensitivity.er_rate_true, color='r', linestyle='--')
plt.axvline(0., color='r', linestyle='--')
plt.xlabel('wimp_rate_multiplier')
plt.ylabel('er_rate_multiplier')
plt.show()

In [None]:
plt.scatter(toys_df['ul'],
            toys_df['er_rate_multiplier'],
            s=3)
plt.axhline(sensitivity.er_rate_true, color='r', linestyle='--')
plt.axvline(0., color='r', linestyle='--')
plt.xlabel('ul')
plt.ylabel('er_rate_multiplier')
plt.show()

In [None]:
chi2 = scipy.stats.chi2

In [None]:
xs = np.linspace(0, 3.5, 100)
ys = 0.5 * chi2.pdf(xs, df=1)

In [None]:
plt.hist(toys_df['wimp_rate_multiplier'], bins=50, density=1, histtype='step')
plt.plot(xs, ys)
#plt.ylim(0, 2)
plt.yscale('log')
plt.show()

In [None]:
xsec = toys_df['ul'] * 4.7e-47
plt.hist(xsec, bins=100, histtype='step')
plt.axvline(np.median(xsec), color='r', linestyle='--')
plt.show()

In [None]:
np.min(toys_df['ul']), np.median(toys_df['ul']), np.max(toys_df['ul'])

# Check lnL parabola

In [None]:
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (9, 6),
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large'}
plt.rcParams.update(params)

In [None]:
def ll_plot(data, title="", save_name=None):
    xs = data['xs']
    ys = data['ys']
    ll_best = data['ll_best']
    ul = data['ul']
    wimp_rm = data['wimp_rate_multiplier']
    er_rm = data['er_rate_multiplier']
    ul_guess = data['ul_guess']

    plt.figure()
    plt.plot(xs, -2*(ys-ll_best), label=r'$2\ln\mathcal{L}_{best} -2\ln\mathcal{L}(\theta)$')
    plt.plot(xs, (-2*(ys-ll_best)-crit_val)**2, label=r'$(2\ln\mathcal{L}_{best} -2\ln\mathcal{L}(\theta) - v_{crit})^2$')

    plt.axhline(0, color='k')
    plt.axhline(crit_val, color='red', label=r'$v_{crit}$')

    plt.axvline(wimp_rm, color='k', linestyle='--', label=r'$\theta_{best}$')
    plt.axvline(ul, color='g', linestyle='--', label=r'$\theta_{UL}$')
    plt.axvline(ul_guess, color='magenta', linestyle='--', label=r'$\theta_{start}$')

    plt.ylim(-1, 8)
    plt.xlim(0.0, 3.5)
    plt.xlabel('wimp_rate_multiplier')
    plt.legend(loc='upper left')
    plt.title(title)
    if save_name is not None:
        plt.savefig(save_name + '.png', dpi=200, bbox_inches='tight')
    plt.show()

In [None]:
for idx, toy in toys_df.iterrows():
    ll_plot(toy, title=f'Toy #{idx}') # , save_name=f'toy_{idx}')