In [0]:
%tensorflow_version 2.x

In [0]:
!git clone https://github.com/FlamTeam/flamedisx.git

%cd flamedisx
!git checkout master
!git pull origin master
!python setup.py develop
%cd ..

In [0]:
import tensorflow as tf
tf.__version__, tf.test.is_built_with_gpu_support(), tf.test.is_gpu_available()

In [0]:
import pandas as pd
np = pd.np
import flamedisx as fd
import matplotlib.pyplot as plt
import scipy
from tqdm import tqdm

%matplotlib inline

In [0]:
# Restart runtime if this says: <module 'flamedisx' (namespace)>
fd

# SR1-like Source

In [0]:
r = fd.LXeSource.tpc_radius
l = fd.LXeSource.tpc_length
v = np.pi * r ** 2 * l
print(f'XENON1T tpc volume {v / 1e6 :.3g} m^3 holds 2 tonnes LXe')
v_small = v / 2 * 1.3
r_small = (v_small / l / np.pi)**0.5
print(f'TPC with radius {r_small :.3g} cm holds 1.3 tonnes LXe')

In [0]:
class myERSource(fd.SR1ERSource):
    # Modify TPC radius to contain 1.3 tonne LXe
    tpc_radius = r_small # cm

    # Needed untill FD PR #58 is merged
    def add_extra_columns(self, d):
        super().add_extra_columns(d)
        # Try new faster implementation, see flamedisx PR #58
        if 't' not in d:
            zero = pd.to_datetime('2000-01-01T12:00')
            nanoseconds_per_day = 1e9 * 3600 * 24
            d['t'] = (d['event_time'] - zero.value) / nanoseconds_per_day

class myWIMPSource(fd.x1t_sr1.SR1NRSource, fd.WIMPSource):
    extra_needed_columns = tuple(set(
    list(fd.x1t_sr1.SR1NRSource.extra_needed_columns) +
    list(fd.WIMPSource.extra_needed_columns)))

    # Modify TPC radius to contain 1.3 tonne LXe
    tpc_radius = r_small # cm

    # WIMP settings
    n_in = 2
    es = np.geomspace(0.7, 50, 100)  # [keV]
    mw = 2e2  # GeV
    sigma_nucleon = 4.7e-47 # cm^2

    # Needed untill FD PR #58 is merged
    def add_extra_columns(self, d):
        super().add_extra_columns(d)
        # Try new faster implementation, see flamedisx PR #58
        if 't' not in d:
            zero = pd.to_datetime('2000-01-01T12:00')
            nanoseconds_per_day = 1e9 * 3600 * 24
            d['t'] = (d['event_time'] - zero.value) / nanoseconds_per_day

# Likelihood definition

In [0]:
lf = fd.LogLikelihood(sources=dict(er=myERSource,
                                   wimp=myWIMPSource),
                      free_rates=('er', 'wimp'),
                      batch_size=350)

In [0]:
# find rate params such that mean number of ER events = 627 and WIMP events = 3.56
def sim(r_er, r_wimp, n_trials=100):
    n_er, n_wimp = 0, 0
    for _ in tqdm(range(n_trials)):
        s_ids = lf.simulate(er_rate_multiplier=r_er,
                            wimp_rate_multiplier=r_wimp)['source']
        n_er += (s_ids == 'er').sum()
        n_wimp += (s_ids == 'wimp').sum()
    return n_er/n_trials, n_wimp/n_trials

In [0]:
sim(0.82, 0.8)

In [0]:
er_rate_true = 0.82
wimp_rate_true = 0.8

# ToyMC definition

In [0]:
# UL only
crit_val = scipy.stats.norm.ppf(0.9) ** 2

In [0]:
def ll_check(lf, bf, ll_best=None):
    if ll_best is None:
        ll_best = lf(**bf)

    xs = np.linspace(0.0, 3.5, 50)
    ys = np.array([lf(wimp_rate_multiplier=x,
                      er_rate_multiplier=bf['er_rate_multiplier']) for x in xs])
    return dict(xs=xs, ys=ys, ll_best=ll_best)

In [0]:
def toymc(do_ll_check=False):
    # simulate background only data
    d = lf.simulate(er_rate_multiplier=er_rate_true,
                    wimp_rate_multiplier=0.)
    lf.set_data(d)

    guess = dict(er_rate_multiplier=0.9,
                 wimp_rate_multiplier=0.1)

    # Determine the global best fit
    bf = lf.bestfit(guess=guess, llr_tolerance=0.0005)
    ll_best = lf(**bf)

    ul_guess = bf.copy()
    wrm_guess = max(1.5, 3 * bf['wimp_rate_multiplier'])
    ul_guess['wimp_rate_multiplier'] = wrm_guess

    def t_stat(wimp_rate_multiplier):
        return -2 * (lf(er_rate_multiplier=bf['er_rate_multiplier'],
                        wimp_rate_multiplier=wimp_rate_multiplier) - ll_best)

    while t_stat(wrm_guess) < crit_val:
        wrm_guess += 0.5
        print(f'increasing ul_guess to {wrm_guess}')
    ul_guess['wimp_rate_multiplier'] = wrm_guess

    ul = lf.one_parameter_interval('wimp_rate_multiplier',
                                   bestfit=bf,
                                   guess=ul_guess,
                                   llr_tolerance=0.0005)
    check = dict()
    if do_ll_check:
        check = ll_check(lf, bf, ll_best=ll_best)
    
    return {**bf, **check,
            'ul':ul,
            'ul_guess': ul_guess['wimp_rate_multiplier']}

In [0]:
%%time
res = toymc(do_ll_check=True)
print(f'bestfit wimp_rate {res["wimp_rate_multiplier"]}, UL {res["ul"]}')

In [0]:
toys = np.array([toymc(do_ll_check=True) for _ in tqdm(range(10))])

In [0]:
from collections import defaultdict
d = defaultdict(list)
for toy in toys:
    for k, v in toy.items():
        d[k].append(v)
toys_df = pd.DataFrame(d)

In [0]:
toys_df.to_pickle('toys.pkl')

In [0]:
#toys_df = pd.read_pickle('toys.pkl')

# Results

In [0]:
toys_df

In [0]:
plt.scatter(toys_df['wimp_rate_multiplier'], toys_df['er_rate_multiplier'])
plt.axhline(er_rate_true, color='r', linestyle='--')
plt.axvline(0., color='r', linestyle='--')
plt.xlabel('wimp_rate_multiplier')
plt.ylabel('er_rate_multiplier')
plt.show()

In [0]:
np.min(toys_df['ul']), np.median(toys_df['ul']), np.max(toys_df['ul'])

In [0]:
plt.hist(np.log10(toys_df['ul']), bins=500, histtype='step')
plt.xlim(-2, 2)
plt.show()

# Check lnL parabola

In [0]:
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (9, 6),
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large'}
plt.rcParams.update(params)

In [0]:
def ll_plot(data, title=""):
    xs = data['xs']
    ys = data['ys']
    ll_best = data['ll_best']
    ul = data['ul']
    wimp_rm = data['wimp_rate_multiplier']
    er_rm = data['er_rate_multiplier']
    ul_guess = data['ul_guess']

    plt.figure()
    plt.plot(xs, -2*(ys-ll_best), label=r'$2\ln\mathcal{L}_{best} -2\ln\mathcal{L}(\theta)$')
    plt.plot(xs, (-2*(ys-ll_best)-crit_val)**2, label=r'$(2\ln\mathcal{L}_{best} -2\ln\mathcal{L}(\theta) - v_{crit})^2$')

    plt.axhline(0, color='k')
    plt.axhline(crit_val, color='red', label=r'$v_{crit}$')

    plt.axvline(wimp_rm, color='k', linestyle='--', label=r'$\theta_{best}$')
    plt.axvline(ul, color='g', linestyle='--', label=r'$\theta_{UL}$')
    plt.axvline(ul_guess, color='magenta', linestyle='--', label=r'$\theta_{start}$')

    plt.ylim(-1, 8)
    plt.xlim(0.0, 3.5)
    plt.xlabel('wimp_rate_multiplier')
    plt.legend(loc='upper left')
    plt.title(title)
    plt.show()

In [0]:
for idx, toy in enumerate(toys):
    ll_plot(toy, title=f'Toy #{idx}')