In [1]:
#icecube:
from icecube import dataio, dataclasses, simclasses
from icecube.icetray import OMKey
from icecube.dataclasses import *

# The usual:
import os
import numpy as np
import copy
import h5py

#Plotting:
%matplotlib notebook
from matplotlib import rcParams
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.pyplot import cm
import matplotlib.pyplot as plt

---

In [2]:
data_binned = {}

In [3]:
data_dir = '/data/user/fhenningsen/deepcore_data/felix_sca-domeff-11/'

In [4]:
# using new GCD
geometry = dataio.I3File("/home/fhenningsen/gcd/physics_volume_GCD.i3.bz2")

gframe = geometry.pop_frame()  
geo = gframe["I3Geometry"] # access geo file via key
all_dom_keys = geo.omgeo.keys()

# create a general event dictionary with 2D array (charge,time) as values
event = {} 
all_dom_keys = []
for i in geo.omgeo.keys():
    if i.pmt==0 and i.string <87:
        all_dom_keys.append(i)

In [5]:
# get simulation set parameters
params = np.load(os.path.join(data_dir, 'PARAMS.npy')).item()

pocams     = params['pocams']
pocam_keys = params['pocam_keys']
truth_arr  = params['truth_arr']
truth_str  = params['truth_string']
truth_dict = params['truth_dict']
scan_N     = params['scan_N']
scan_abs   = params['scan_abs']
scan_sca   = params['scan_sca']
scan_dome  = params['scan_domeff']
scan_p0    = params['scan_p0']
scan_p1    = params['scan_p1']

IOError: [Errno 2] No such file or directory: '/data/user/fhenningsen/deepcore_data/felix_sca-domeff-11/PARAMS.npy'

In [None]:
params

In [None]:
def param_string(abs_i, sca_i, dome_i, p0_i, p1_i):
    p = 'ABS-%.3f_SCA-%.3f_DOME-%.3f_P0-%.3f_P1-%.3f' %(abs_i, sca_i, dome_i, p0_i, p1_i)
    return p

## Read in data

In [None]:
data_binned = h5py.File(data_dir + 'all_data.h5', 'r')

In [None]:
dt = data_binned['truth'].keys()
dd = data_binned['data'].keys()

for pk in pocams:
    if pk in dt and pk in dd:
        print('POCAM_KEY (%s) \tavailable in TRUTH and SIM.' %pk)
    else:
        print('%s \tMISSING !!!' %pk)

print('\nData structure:')
print('\tdata [TRUTH] [POCAM_KEY] [OM_KEY]')
print('\tdata [DATA]  [POCAM_KEY] [OM_KEY] [PARAM_STRING]')

### Plot an example time profile from truth

In [None]:
pk  = '88-72'
ok  = '36-30-0'#OMKey(36,30,0)

test_truth = data_binned['truth'][pk][ok][:]
test_data  = data_binned['data'][pk][ok][param_string(1,1,0.9,1,1)][:]

# raw time profiles
plt.figure(figsize=(8,3))
plt.step(range(len(test_truth)), test_truth, where='mid', label='Truth')
plt.step(range(len(test_data)), test_data, where='mid', color='red', label='Example data, DomEff = 0.9')
plt.xlim(0, 1500)
plt.xlabel('Time [ns]')
plt.ylabel('Hits / bin')
plt.legend()
plt.tight_layout()

# ratio
plt.figure(figsize=(8,3))
h = test_truth / test_data
plt.hist(h[h>0], label='Truth / Data', range=(1, 10), bins=10)
plt.yscale('log')
plt.legend()
plt.show()

### Comparison

# Computing the LLH-landsape
For the likelihhod we combine the likelihoods of individual DOMs of all POCAM flashes. To keep it managable we only look at DOMs that are within a 100m radius around the corresponing POCAM. 
<br>
Using the likelihood:
$$
\mathcal{L} = \prod_{\text{POCAMs}} \, \prod_{\text{DOMs} < 100m} \, \prod_{\text{datapoints}} \, p(d_i, t_i)
$$
using the log llh:
$$
-2 ln \mathcal{L} = -2 \sum_{\text{POCAMs}} \, \sum_{\text{DOMs} < 100m} \, \sum_{\text{datapoints}} \, ln( p(d_i, t_i))
$$
If $t_i$ > 20:
$$
p(d_i,t_i) = Normal(d_i,t_i) = \frac{1}{\sqrt{2 \pi \sigma^2}} \, e^{-\frac{(d_i-t_i)^2}{2 \sigma^2}} \qquad \text{where} \quad \sigma^2 = \sqrt{t_i}^2 = t_i
$$
and if $t_i$ < 20:
$$
        p(d_i,t_i) = Poisson(d_i,t_i) = \frac{t_i^{d_i}}{d_i !} \, e^{-t_i} 
$$

In [None]:
pocams

In [None]:
# define empty llh-landscape:
llh_array = np.zeros((len(scan_sca),len(scan_dome)))

# number of bins to use for llh (from first non-zero value)
n_data = 25

for sca_i, SCA in enumerate(scan_sca):
    
    for dome_i, DOME in enumerate(scan_dome):
        
        # parameter string (abs, sca, domeff, p0, p1)
        par = param_string(truth_dict['abs'], SCA, DOME, truth_dict['p0'], truth_dict['p1'])
        
        # list of llh for pocams
        llh_pocam = []
        
        for pocamk in pocam_keys:
            
            pk = '%i-%i' %(pocamk[0], pocamk[1])
            print('Calculating %s' %(pk))
            
            # list of llh for doms
            llh_doms = []
            
            # get POCAM coordinates:
            p_x = geo.omgeo[pocamk].position.x
            p_y = geo.omgeo[pocamk].position.y
            p_z = geo.omgeo[pocamk].position.z
            
            for ok in all_dom_keys:
                if ok.pmt == 0: # ignore upgrade OMs

                    #list of llh for datapoints
                    llh_datapoints = []
                        
                    # get DOM coordinates:
                    d_x = geo.omgeo[ok].position.x
                    d_y = geo.omgeo[ok].position.y
                    d_z = geo.omgeo[ok].position.z

                    # get distance to POCAM
                    distance = np.sqrt((d_x-p_x)**2 + (d_y-p_y)**2 + (d_z-p_z)**2)
                    
                    # get h5 domkey
                    omk = '%i-%i-%i' %(ok[0], ok[1], ok[2])
                    
                    # check if dom was hit at all
                    v_truth = max(data_binned['truth'][pk][omk]) > 0
                    v_sim   = max(data_binned['data'][pk][omk][par]) > 0
                    
#                     print(round(d_x,1), round(d_y,1), round(d_z,1))
#                     print(round(p_x,1), round(p_y,1), round(p_z,1))
#                     print(pk, ok, v_truth, v_sim, distance)
#                     print('\n')
                    
                    # look only at doms < 100m distance that were hit
                    if v_truth and v_sim:
                        
                        truth    = data_binned['truth'][pk][omk]
                        data_sim = data_binned['data'][pk][omk][par]
                        
                        # get first 20 datapoints:
                        index = next((i for i, x in enumerate(truth) if x), None) # get index of first non-zero bin
                        
                        truth    = truth[index:index+n_data]
                        data_sim = data_sim[index:index+n_data]

                        for i in range(n_data):
                            t = float(truth[i])
                            d = float(data_sim[i])


                            if t > 0:
                                # use poisson if d <20. Values greater 20 cant be computed
                                if d < 20.:
                                    llh_datapoints.append( -2*( d*np.log(t) -np.log(np.math.factorial(d)) -t ) )

                                # use gaussian:
                                else:
                                    llh_datapoints.append( (((d-t)**2)/t)+ 2*np.log((np.sqrt(2*np.pi*t)))  )
                        
                        
                # append llh sum from DOM:
                llh_doms.append(sum(llh_datapoints)/n_data)
                        
            # append llh sum from POCAM:
            llh_pocam.append(sum(llh_doms))
        
        # putting the final llh into the landscape array:
        llh_array[sca_i,dome_i]=sum(llh_pocam)

print llh_array

In [None]:
fig = plt.figure(figsize=(5,5))
ax  = fig.add_subplot(111)

# plot likelihood array
x  = scan_dome
y  = scan_sca
dx = np.abs((np.unique(x)[1:] - np.unique(x)[:-1])/2)[0]
dy = np.abs((np.unique(y)[1:] - np.unique(y)[:-1])/2)[0]
xx = np.unique(np.append(x - dx, sorted(x + dx)[-1]))
yy = np.unique(np.append(y - dy, sorted(y + dy)[-1]))
plt.pcolormesh(xx, yy, llh_array)
cbar = plt.colorbar(orientation='vertical',fraction=0.045,ax=ax)
cbar.set_label(r'$-2 \, ln \, \mathcal{L}$',fontsize=18)

# plot markers for truth and min
index_1,index_2=np.where(llh_array == np.amin(llh_array))
plt.scatter(truth_dict['dome'], truth_dict['sca'], s=250,label='MC truth',marker='*', color='red')
plt.scatter([scan_sca[int(index_1)]], [scan_dome[int(index_2)]], s=75,label='LLH min',marker='*', color='cyan')

# format
ax.set_title("Grid search",fontsize=18)
# ax.set_xticks(np.arange(0, len(scan_sca), 1))
# ax.set_yticks(np.arange(0, len(scan_sca), 1))
# ax.set_xticklabels(scan_sca)
# ax.set_yticklabels(scan_dome)
ax.set_xlabel('DomEff', fontsize=18)
ax.set_ylabel('Sca', fontsize=18)
ax.set_aspect('equal')
ax.legend(fontsize=13)
plt.tight_layout()
plt.savefig('grid_search.pdf')

print "LLH minimum at sca:", scan_sca[int(index_1)]
print "        and domeff:", scan_dome[int(index_2)]