In [1]:
import uproot as up
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#plt.style.use('seaborn-paper')
plt.rcParams["patch.force_edgecolor"] = True

In [2]:
file_BKG = up.open("radioactivity_userfile_7days.root")
file_IBD = up.open('ibd_userfile_7days.root')

dataset_IBD = file_IBD['TRec'].arrays(library = 'np')
dataset_BKG = file_BKG['TRec'].arrays(library = 'np')

Dataset è un DICT di NUMPY ARRAY

In [3]:
print(dataset_IBD["m_QEn"].shape)
print(dataset_BKG["m_QEn"].shape)

print(dataset_BKG["m_QEn"].shape[0]+dataset_IBD["m_QEn"].shape[0])

(704,)
(61870091,)
61870795


In [4]:
def combine_dict(d1, d2):
    combined = {}
    for k in set(d1.keys()) | set(d2.keys()):
        if k in d1 and k in d2 and isinstance(d1[k], np.ndarray) and isinstance(d2[k], np.ndarray):
            combined[k] = np.concatenate([d1[k], d2[k]])
        elif k in d1:
            combined[k] = d1[k]
        else:
            combined[k] = d2[k]
    
    provenienza = np.concatenate([np.full_like(d1.get(k, []), 1), np.full_like(d2.get(k, []), 0)])
    return {**combined, 'provenienza': provenienza}

In [5]:
all_data = combine_dict(dataset_IBD,dataset_BKG)

-> Dunque se viene dal dataset IBD si ha che l'evento avrà una Label di 1

### Now I'll sort the data in temporal order

# Now for the dataset

In [6]:
ord_idx = all_data["m_triggerT"].argsort()
ord_idx

array([     704,      705,      706, ..., 61870792, 61870793, 61870794])

In [7]:
for key in all_data.keys():
    all_data[key] = all_data[key][ord_idx]

In [8]:
# plt.plot(all_data["m_triggerT"])

# Creatrion of the $\Delta r$, $\Delta t$ and label them IBD or Decay

In [9]:
from numba import jit, njit, prange, get_num_threads

In [10]:
# @njit(parallel = False)
# def create_features(x,y,z,E,t,proven, expon_time_cut = 5 * 220e3):
#     delta_time = np.zeros(0)
#     delta_radius = np.zeros(0)
#     E_pro = np.zeros(0)
#     E_del = np.zeros(0)
#     Label = np.zeros(0)

#     for i in range(x.shape[0] - 1):
#         for j in range(i + 1 , x.shape[0] - 1): # Non devo considerare le coppie j antecedenti perchè sono state già contate da i successivi
#             if (t[j] - t[i]) < expon_time_cut: # j è l'indice del delay   
#                 delta_time = np.append(delta_time,t[j] - t[i])
#                 delta_radius = np.append(delta_radius,np.sqrt((x[i] - x[j])**2 + (y[i] - y[j])**2 + (z[i] - z[j])**2))
#                 E_pro = np.append(E_pro, E[i])
#                 E_del = np.append(E_del, E[j])
#                 if proven[i] == 1 and proven[j] == 1:
#                     Label = np.append(Label,1)
#                 else:
#                     Label = np.append(Label,0)
#             else: break
#     return delta_time, delta_radius, E_pro, E_del, Label           
                    
                
@njit(parallel = True)
def create_features_handle(x,y,z,E,t,proven, expon_time_cut = 5 * 220e3):

    # n = get_num_threads()
    n = x.shape[0] - 1

    # Creo una vettore "locale" che viene scritto/letto solo da un thread per volta 
    delta_time = n*[np.zeros(0)]
    delta_radius = n*[np.zeros(0)]
    E_pro = n*[np.zeros(0)]
    E_del = n*[np.zeros(0)] 
    Label = n*[np.zeros(0)]

    for i in prange(x.shape[0] - 1):

        mask = np.logical_and(t>t[i], (t - t[i]) < expon_time_cut)
        to_loop = np.nonzero(mask)[0]
        
        # Non ciclo su tutti i possibili eventi, ma solo su quelli che mi possono interessare

        for t_index in range(len(to_loop)): # Non devo considerare le coppie j antecedenti perchè sono state già contate da i successivi
            j = to_loop[t_index]

            if (t[j] - t[i]) < expon_time_cut: # j è l'indice del delay

                delta_time[i] = np.append(delta_time[i],t[j] - t[i])
                delta_radius[i] = np.append(delta_radius[i],np.sqrt((x[i] - x[j])**2 + (y[i] - y[j])**2 + (z[i] - z[j])**2))
                E_pro[i] = np.append(E_pro[i], E[i])
                E_del[i] = np.append(E_del[i], E[j])
                if proven[i] == 1 and proven[j] == 1:
                    Label[i] = np.append(Label[i],1)
                else:
                    Label[i] = np.append(Label[i],0)
            else:
                print(i, j, t[j] - t[i], 'Qualcosa non va')
                break

    return delta_time, delta_radius, E_pro, E_del, Label        

from iteration_utilities import deepflatten

# Funzione per fare un flatten dell'output di Numba
def create_features(x,y,z,E,t,proven, expon_time_cut = 5 * 220e3):
    res = create_features_handle(x,y,z,E,t,proven, expon_time_cut)
    out = []
    for vec in res:
        out.append(np.asarray(list(deepflatten(vec))))
    return out

In [17]:
features = {"delta_time": np.array([]),
            "delta_radius": np.array([]),
            "E_pro": np.array([]),
            "E_del": np.array([]), 
            "Label": np.array([])}

cut = 1000000
features["delta_time"],features["delta_radius"],features["E_pro"],features["E_del"],features["Label"] = create_features(
    all_data["recx"][:cut],
    all_data["recy"][:cut],
    all_data["recz"][:cut],
    all_data["m_QEn"][:cut],
    all_data["m_triggerT"][:cut],
    all_data["provenienza"][:cut])

In [12]:
features["delta_time"].shape

(11238,)

In [13]:
print(all_data["recx"].dtype)
print(all_data["recy"].dtype)
print(all_data["recz"].dtype)
print(all_data["m_QEn"].dtype)
print(all_data["m_triggerT"].dtype)
print(all_data["provenienza"].dtype)

float32
float32
float32
float32
float64
float32


In [16]:
count = features["Label"].sum()

print(count)

3.0
