In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

In [None]:
import uproot
from pathlib import Path
from tqdm.auto import tqdm

# load utils
df_pmt_id_conversion = pd.read_csv("/home/ferracci/new_dataset/utils/PMT_ID_conversion.csv")
df_pmt_position = pd.read_csv("/home/ferracci/new_dataset/utils/PMTPos_CD_LPMT.csv")
df_spmt_id_conversion = pd.read_csv("/home/ferracci/new_dataset/utils/SPMT_ID_conversion.csv")
df_spmt_position = pd.read_csv("/home/ferracci/new_dataset/utils/PMTPos_CD_SPMT.csv")

# computes PMTs positions 
x = np.array(df_pmt_position['x']/1000).reshape((-1, 1))
y = np.array(df_pmt_position['y']/1000).reshape((-1, 1))
z = np.array(df_pmt_position['z']/1000).reshape((-1, 1))
pos = np.hstack((x, y, z))

x_s = np.array(df_spmt_position['x']/1000).reshape((-1, 1))
y_s = np.array(df_spmt_position['y']/1000).reshape((-1, 1))
z_s = np.array(df_spmt_position['z']/1000).reshape((-1, 1))
pos_s = np.hstack((x_s, y_s, z_s))

pmt_id_raw_to_id_map = dict(zip(df_pmt_id_conversion['CdID'], df_pmt_id_conversion['PMTID']))
pmt_id_to_pos_map = dict(zip(df_pmt_position['PMTID'], pos))

spmt_id_raw_to_id_map = dict(zip(df_spmt_id_conversion['CdID'], df_spmt_id_conversion['SPMTID']))
spmt_id_to_pos_map = dict(zip(df_spmt_position['SPMTID'], pos_s))

files = list(Path("/mnt/data/train_flat_dataset_processed/").glob("*"))

pmt_pos, charge, fht = np.array([]), np.array([]), np.array([])
spmt_pos, charge_s, fht_s = np.array([]), np.array([]), np.array([])
targets_dataframe = []

# compute PMTs positions, charges and first-hit-times for roughly 500k events
for filename in tqdm(files[:125]):
    hits = np.load(filename, allow_pickle=True)["hits"]
    targets = np.load(filename, allow_pickle=True)["primaries"]

    pmt_id_raw, b, c = hits[0, :], hits[1, :], hits[2, :]
    spmt_id_raw, e, f = hits[3, :], hits[4, :], hits[5, :]

    # maps raw ids (CdID) to standard ids (PMTID)
    pmt_id = np.array([np.array([pmt_id_raw_to_id_map[n] for n in event]) for event in pmt_id_raw], dtype=object)
    spmt_id = np.array([np.array([spmt_id_raw_to_id_map[n] for n in event]) for event in spmt_id_raw], dtype=object)

    # maps standard ids (PMTID) to PMTs positions
    a = np.array([np.array([pmt_id_to_pos_map[n] for n in event]) for event in pmt_id], dtype=object)
    d = np.array([np.array([spmt_id_to_pos_map[n] for n in event]) for event in spmt_id], dtype=object)

    # append to numpy arrays
    pmt_pos, charge, fht = np.append(pmt_pos, a), np.append(charge, b), np.append(fht, c)
    spmt_pos, charge_s, fht_s = np.append(spmt_pos, d), np.append(charge_s, e), np.append(fht_s, f)

    # retrieve targets_dataframes
    t_df = pd.DataFrame({"Qedep": targets[:, 1], "Redep": targets[:, 5]})
    targets_dataframe.append(t_df)

targets_dataframe = pd.concat(targets_dataframe, axis=0, ignore_index=True)

### Event 3D Plot - Large PMTs and Small PMTs

In [None]:
plt.style.use("dark_background")
plt.rc("text", usetex=True)
plt.rc("font", family="cm")
plt.rcParams["grid.color"] = (0.5, 0.5, 0.5, 0.2)

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(11,9), subplot_kw={"projection": "3d"}, dpi=200)

# charge 3d
ax = axes[0, 0]

scatter = ax.scatter(pmt_pos[5][:, 0], pmt_pos[5][:, 1], pmt_pos[5][:, 2], s=0.1, alpha=1, c=charge[5], cmap="YlOrRd", vmin=0, vmax=30)

ax.set_xlabel("$x$", fontsize=12)
ax.set_ylabel("$y$", fontsize=12)
ax.set_zlabel("$z$", fontsize=12)

cbar = fig.colorbar(scatter, ax=ax, shrink=0.5, aspect=10, ticks=[0, 10, 20, 30])
cbar.ax.set_title("Charge", fontsize=12)
cbar.outline.set_edgecolor('w')
cbar.outline.set_linewidth(0.5) 

ax.view_init(elev=10, azim=50)

ax.tick_params(axis="both", which="major", labelsize=9)
ax.tick_params(axis="both", which='minor', labelsize=9)
ax.set_xticks(ticks=[-20, -10, 0, 10, 20])
ax.set_yticks(ticks=[-20, -10, 0, 10, 20])
ax.set_zticks(ticks=[-20, -10, 0, 10, 20])

ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False

ax.xaxis.pane.set_edgecolor('k')
ax.yaxis.pane.set_edgecolor('k')
ax.zaxis.pane.set_edgecolor('k')

# fht 3d
ax = axes[0, 1]
scatter = ax.scatter(pmt_pos[5][:, 0], pmt_pos[5][:, 1], pmt_pos[5][:, 2], s=0.1, alpha=1, c=fht[5], cmap="PuBuGn", vmin=0, vmax=1000)

ax.set_xlabel("$x$", fontsize=12)
ax.set_ylabel("$y$", fontsize=12)
ax.set_zlabel("$z$", fontsize=12)

cbar = fig.colorbar(scatter, ax=ax, shrink=0.5, aspect=10, ticks=[0, 200, 400, 600, 800, 1000])
cbar.ax.set_title("FHT", fontsize=12)
cbar.outline.set_edgecolor('w')
cbar.outline.set_linewidth(0.5)

ax.view_init(elev=10, azim=50)

ax.tick_params(axis="both", which="major", labelsize=9)
ax.tick_params(axis="both", which='minor', labelsize=9)
ax.set_xticks(ticks=[-20, -10, 0, 10, 20])
ax.set_yticks(ticks=[-20, -10, 0, 10, 20])
ax.set_zticks(ticks=[-20, -10, 0, 10, 20])

ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False

ax.xaxis.pane.set_edgecolor('k')
ax.yaxis.pane.set_edgecolor('k')
ax.zaxis.pane.set_edgecolor('k')

plt.subplots_adjust(wspace=0.1, hspace=0)

# charge 3d
ax = axes[1, 0]

scatter = ax.scatter(spmt_pos[5][:, 0], spmt_pos[5][:, 1], spmt_pos[5][:, 2], s=0.5, alpha=1, c=charge_s[5], cmap="YlOrRd", vmin=0, vmax=3)

ax.set_xlabel("$x$", fontsize=12)
ax.set_ylabel("$y$", fontsize=12)
ax.set_zlabel("$z$", fontsize=12)

cbar = fig.colorbar(scatter, ax=ax, shrink=0.5, aspect=10, ticks=[0., 1., 2., 3.0])
cbar.ax.set_title("Charge", fontsize=12)
cbar.outline.set_edgecolor('w')
cbar.outline.set_linewidth(0.5)

ax.view_init(elev=10, azim=50)

ax.tick_params(axis="both", which="major", labelsize=9)
ax.tick_params(axis="both", which='minor', labelsize=9)
ax.set_xticks(ticks=[-20, -10, 0, 10, 20])
ax.set_yticks(ticks=[-20, -10, 0, 10, 20])
ax.set_zticks(ticks=[-20, -10, 0, 10, 20])

ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False

ax.xaxis.pane.set_edgecolor('k')
ax.yaxis.pane.set_edgecolor('k')
ax.zaxis.pane.set_edgecolor('k')

# fht 3d
ax = axes[1, 1]
scatter = ax.scatter(spmt_pos[5][:, 0], spmt_pos[5][:, 1], spmt_pos[5][:, 2], s=0.5, alpha=1, c=fht_s[5], cmap="PuBuGn", vmin=0, vmax=1000)

ax.set_xlabel("$x$", fontsize=12)
ax.set_ylabel("$y$", fontsize=12)
ax.set_zlabel("$z$", fontsize=12)

cbar = fig.colorbar(scatter, ax=ax, shrink=0.5, aspect=10, ticks=[0, 200, 400, 600, 800, 1000])
cbar.ax.set_title("FHT", fontsize=12)
cbar.outline.set_edgecolor('w')
cbar.outline.set_linewidth(0.5)

ax.view_init(elev=10, azim=50)

ax.tick_params(axis="both", which="major", labelsize=9)
ax.tick_params(axis="both", which='minor', labelsize=9)
ax.set_xticks(ticks=[-20, -10, 0, 10, 20])
ax.set_yticks(ticks=[-20, -10, 0, 10, 20])
ax.set_zticks(ticks=[-20, -10, 0, 10, 20])

ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False

ax.xaxis.pane.set_edgecolor('k')
ax.yaxis.pane.set_edgecolor('k')
ax.zaxis.pane.set_edgecolor('k')

fig.savefig("/home/ferracci/new_dataset/images/scatter_event.png", dpi=300, bbox_inches="tight", pad_inches=0.4)

### $R_{cht}$ vs. $R_{cc}$

In [None]:
# compute center of charge and center of first hit time for all events
x_cc, y_cc, z_cc = np.zeros(len(pmt_pos)), np.zeros(len(pmt_pos)), np.zeros(len(pmt_pos))
for i in range(len(pmt_pos)):
    x_cc[i] = np.sum(pmt_pos[i][:, 0] * charge[i]) / np.sum(charge[i])
    y_cc[i] = np.sum(pmt_pos[i][:, 1] * charge[i]) / np.sum(charge[i])
    z_cc[i] = np.sum(pmt_pos[i][:, 2] * charge[i]) / np.sum(charge[i])
R_cc = np.sqrt(x_cc**2 + y_cc**2 + z_cc**2)

x_cht, y_cht, z_cht = np.zeros(len(pmt_pos)), np.zeros(len(pmt_pos)), np.zeros(len(pmt_pos))
for i in range(len(pmt_pos)):
    x_cht[i] = np.sum(pmt_pos[i][:, 0] / (fht[i] + 50)) / np.sum(1 / (fht[i] + 50))
    y_cht[i] = np.sum(pmt_pos[i][:, 1] / (fht[i] + 50)) / np.sum(1 / (fht[i] + 50))
    z_cht[i] = np.sum(pmt_pos[i][:, 2] / (fht[i] + 50)) / np.sum(1 / (fht[i] + 50))
R_cht = np.sqrt(x_cht**2 + y_cht**2 + z_cht**2)

In [None]:
Redep = targets_dataframe["Redep"]

In [None]:
# scatter of R_cht against R_cc --> non linearity 
plt.style.use("default")
plt.rc("text", usetex=True)
plt.rc("font", family="cm")
plt.rcParams["grid.color"] = (0.5, 0.5, 0.5, 0.2)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7,5), dpi=200)

scatter = ax.scatter(R_cc, R_cht, s=0.3, alpha=0.9, c=Redep, cmap="inferno")

ax.set_xlabel("$R_{cc}$ [m]", fontsize=15)
ax.set_ylabel("$R_{cht}$ [m]", fontsize=15)

ax.set_xlim((0, 12))
ax.set_ylim((0, 10))

cbar = fig.colorbar(scatter, ax=ax, shrink=0.8, aspect=10)
cbar.ax.set_title("$R$ [m]", fontsize=15)
cbar.outline.set_edgecolor('k')
cbar.outline.set_linewidth(0.5)

ax.tick_params(axis="both", which="major", labelsize=12)
ax.tick_params(axis="both", which='minor', labelsize=12)

ax.grid()

fig.savefig("/home/ferracci/new_dataset/images/scatter_Rcht_vs_R_cc.png", dpi=300, bbox_inches="tight", pad_inches=0.2);

### AccumCharge vs. $R^3$

In [None]:
accum_charge = np.zeros(len(pmt_pos))
for i in range(len(pmt_pos)):
    accum_charge[i] = np.sum(charge[i]) + np.sum(charge_s[i])

In [None]:
targets_dataframe["average_charge"] = accum_charge / targets_dataframe["Qedep"]
targets_cut = targets_dataframe[(targets_dataframe["average_charge"] > 1600) & (targets_dataframe["average_charge"] < 2800)]

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7,5), dpi=200)

hist2d = ax.hist2d(np.array(targets_cut["Redep"])**3, np.array(targets_cut["average_charge"]), bins=100, cmap="inferno")

ax.set_xlabel("$R^3$ [$\mathrm{m}^3$]", fontsize=15)
ax.set_ylabel("Accumulated charge per MeV [PE/MeV]", fontsize=15)

cbar = fig.colorbar(hist2d[-1], ax=ax, shrink=0.8, aspect=10)
cbar.ax.set_title("Count", fontsize=15)
cbar.outline.set_edgecolor('k')
cbar.outline.set_linewidth(0.5)

ax.tick_params(axis="both", which="major", labelsize=12)
ax.tick_params(axis="both", which='minor', labelsize=12)

fig.savefig("/home/ferracci/new_dataset/images/scatter_charge_vs_R3.png", dpi=300, bbox_inches="tight", pad_inches=0.2);

### Charge and FHT distributions

In [None]:
# visualize average pdf for fht distribution with errorbars
test = targets_dataframe[(targets_dataframe["Qedep"] > 1.9) & (targets_dataframe["Qedep"] < 2.1) & 
                         (targets_dataframe["Redep"] > 15.9) & (targets_dataframe["Redep"] < 16.1)]
fht_E2_R16 = fht[test.index.values]

# remove artificial bar at fht=0 due to how the trigger algorithm works
for i in range(len(fht_E2_R16)):
    fht_E2_R16[i] = fht_E2_R16[i][fht_E2_R16[i] != 0]

n_bins = 80
hist = np.zeros((len(fht_E2_R16), n_bins))
hist[0, :], bins = np.histogram(fht_E2_R16[0], bins=n_bins, range=(0, 1200))
for i in range(len(fht_E2_R16)-1):
    hist[i+1, :], _ = np.histogram(fht_E2_R16[i+1], bins=bins)
mean_hist = np.mean(hist, axis=0)
std_hist = np.std(hist, axis=0)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7,5), dpi=200)

ax.bar(bins[:-1], mean_hist, width=(bins[1]-bins[0])/1.25, color="purple")
ax.errorbar(bins[:-1], mean_hist, yerr=std_hist, fmt=".", markersize=0, capsize=1.5, ecolor="k", elinewidth=0.75)

ax.set_xlabel("t [ns]", fontsize=15)
ax.set_ylabel("Number of fired channels per 15 ns", fontsize=15)

ax.tick_params(axis="both", which="major", labelsize=12)
ax.tick_params(axis="both", which='minor', labelsize=12)

ax.grid()

fig.savefig("/home/ferracci/new_dataset/images/cdf_fht_error.png", dpi=300, bbox_inches="tight", pad_inches=0.2);

In [None]:
# let's now compare the distributions for different energies at the center of the detector
df = targets_dataframe[targets_dataframe["Redep"] < 3]
E_L = df[(df["Qedep"] > 0.9) & (df["Qedep"] < 1.1)]
E_C = df[(df["Qedep"] > 4.9) & (df["Qedep"] < 5.1)]
E_R = df[(df["Qedep"] > 8.9) & (df["Qedep"] < 9.1)]

In [None]:
charge_E_L, fht_E_L = charge[E_L.index.values], fht[E_L.index.values]
charge_E_C, fht_E_C = charge[E_C.index.values], fht[E_C.index.values]
charge_E_R, fht_E_R = charge[E_R.index.values], fht[E_R.index.values]

for i in range(len(fht_E_L)):
    fht_E_L[i] = fht_E_L[i][fht_E_L[i] != 0]
for i in range(len(fht_E_C)):
    fht_E_C[i] = fht_E_C[i][fht_E_C[i] != 0]
for i in range(len(fht_E_R)):
    fht_E_R[i] = fht_E_R[i][fht_E_R[i] != 0]

In [None]:
def get_mean_hist(array, xlim, n_bins):
    hist = np.zeros((len(array), n_bins))
    hist[0, :], bins = np.histogram(array[0], bins=n_bins, range=xlim)
    for i in range(len(array)-1):
        hist[i+1, :], _ = np.histogram(array[i+1], bins=bins)
    mean_hist = np.mean(hist, axis=0)
    return mean_hist, bins

In [None]:
def plot_hist(ax, array, xlim, n_bins, color, alpha):
    hist, bins = get_mean_hist(array, xlim, n_bins)
    ax.bar(bins[:-1], hist, width=bins[1]-bins[0], color=color, alpha=alpha)
    return ax

In [None]:
def plot_cdf(ax, array, xlim, n_bins, color, plot_type, legend):
    """ 
    Displays the average cumulative distribution function plot, with a vertical line at the average mean value.

    Parameters:
        ax (matplotlib.axes): matplotlib ax on which to work
        array (ndarray): array of objects containing samples from the distribution
        xlim (tuple): range to use for the histogram
        n_bins (int): number of bins to use for the histogram 
        color (string): specify color for the plot
        plot_type (string): "R" for plot at different radii, "E" for plot at different energies
        legend (float): average radius/energy value (depending on plot_type) to display in the legend

    Returns: 
        matplotlib.axes
    """
    hist, bins = get_mean_hist(array, xlim, n_bins)
    cumulative_hist = np.cumsum(hist)
    cumulative_hist_norm = cumulative_hist / np.sum(hist)

    if plot_type == "R":
        ax.plot(bins[:-1], cumulative_hist_norm, linewidth=1, color=color, label=f"R = {legend} m")
    elif plot_type == "E":
        ax.plot(bins[:-1], cumulative_hist_norm, linewidth=1, color=color, label=f"E = {legend} MeV")
    else:
        raise ValueError("Only 'R' and 'E' can be passed to plot_type")

    mean_value = 0
    for i in range(len(array)):
        mean_value += np.mean(array[i])
    mean_value /= len(array)
    ax.axvline(mean_value, color=color, linestyle="dashed", linewidth=1)

    return ax

In [None]:
fig = plt.figure(figsize=(8,5), dpi=150)

ax1 = fig.add_axes([0, 0, 1, 1])
plot_cdf(ax1, charge_E_L, xlim=(0, 6), n_bins=500, color="darkmagenta", plot_type="E", legend=round(np.mean(E_L["Qedep"]), 1))
plot_cdf(ax1, charge_E_C, xlim=(0, 6), n_bins=500, color="cornflowerblue", plot_type="E", legend=round(np.mean(E_C["Qedep"]), 1))
plot_cdf(ax1, charge_E_R, xlim=(0, 6), n_bins=500, color="seagreen", plot_type="E", legend=round(np.mean(E_R["Qedep"]), 1))
ax1.set_xlabel("nPE", fontsize=15)
ax1.set_ylabel("CDF(nPE)", fontsize=15)
ax1.set_xlim((0, 6))
ax1.tick_params(axis="both", which="major", labelsize=12)
ax1.tick_params(axis="both", which='minor', labelsize=12)
ax1.grid()
ax1.legend(loc=(0.37, 1.01), ncol=3, frameon=False, prop={'size': 12})

ax2 = fig.add_axes([0.425, 0.1, 0.55, 0.55])
plot_hist(ax2, charge_E_R, xlim=(0, 6), n_bins=100, color="seagreen", alpha=0.9)
plot_hist(ax2, charge_E_C, xlim=(0, 6), n_bins=100, color="cornflowerblue", alpha=0.9)
plot_hist(ax2, charge_E_L, xlim=(0, 6), n_bins=100, color="darkmagenta", alpha=0.9)
ax2.set_xlim((0, 6))
ax2.tick_params(axis="both", which="major", labelsize=12)
ax2.tick_params(axis="both", which='minor', labelsize=12)
ax2.grid()

fig.savefig("/home/ferracci/new_dataset/images/cdf_charge_center.png", dpi=300, bbox_inches="tight", pad_inches=0.2);

In [None]:
fig = plt.figure(figsize=(8,5), dpi=150)

ax1 = fig.add_axes([0, 0, 1, 1])
plot_cdf(ax1, fht_E_L, xlim=(0, 1250), n_bins=500, color="darkmagenta", plot_type="E", legend=round(np.mean(E_L["Qedep"]), 1))
plot_cdf(ax1, fht_E_C, xlim=(0, 1250), n_bins=500, color="cornflowerblue", plot_type="E", legend=round(np.mean(E_C["Qedep"]), 1))
plot_cdf(ax1, fht_E_R, xlim=(0, 1250), n_bins=500, color="seagreen", plot_type="E", legend=round(np.mean(E_R["Qedep"]), 1))
ax1.set_xlabel("t [ns]", fontsize=15)
ax1.set_ylabel("CDF(t)", fontsize=15)
ax1.set_xlim((0, 1250))
ax1.tick_params(axis="both", which="major", labelsize=12)
ax1.tick_params(axis="both", which='minor', labelsize=12)
ax1.grid()
ax1.legend(loc=(0.37, 1.01), ncol=3, frameon=False, prop={'size': 12})

ax2 = fig.add_axes([0.425, 0.1, 0.55, 0.55])
plot_hist(ax2, fht_E_R, xlim=(0, 1200), n_bins=100, color="seagreen", alpha=0.9)
plot_hist(ax2, fht_E_C, xlim=(0, 1200), n_bins=100, color="cornflowerblue", alpha=0.9)
plot_hist(ax2, fht_E_L, xlim=(0, 1200), n_bins=100, color="darkmagenta", alpha=0.9)
ax2.set_xlim((0, 1200))
ax2.tick_params(axis="both", which="major", labelsize=12)
ax2.tick_params(axis="both", which='minor', labelsize=12)
ax2.grid()

fig.savefig("/home/ferracci/new_dataset/images/cdf_fht_center.png", dpi=300, bbox_inches="tight", pad_inches=0.2);

In [None]:
# let's now compare the distributions for different radii at E = 1MeV
df = targets_dataframe[(targets_dataframe["Qedep"] > 0.75) & (targets_dataframe["Qedep"] < 1.25)]
R2 = df[(df["Redep"] > 1.55) & (df["Redep"] < 2.3)]
R9 = df[(df["Redep"] > 8.8) & (df["Redep"] < 9.1)]
R17 = df[(df["Redep"] > 16.9) & (df["Redep"] < 17.05)]

In [None]:
charge_R2, fht_R2 = charge[R2.index.values], fht[R2.index.values]
charge_R9, fht_R9 = charge[R9.index.values], fht[R9.index.values]
charge_R17, fht_R17 = charge[R17.index.values], fht[R17.index.values]

for i in range(len(fht_R2)):
    fht_R2[i] = fht_R2[i][fht_R2[i] != 0]
for i in range(len(fht_R9)):
    fht_R9[i] = fht_R9[i][fht_R9[i] != 0]
for i in range(len(fht_R17)):
    fht_R17[i] = fht_R17[i][fht_R17[i] != 0]

In [None]:
fig = plt.figure(figsize=(8,5), dpi=150)

ax1 = fig.add_axes([0, 0, 1, 1])
plot_cdf(ax1, charge_R2, xlim=(0, 6), n_bins=500, color="darkmagenta", plot_type="R", legend=round(np.mean(R2["Redep"]), 1))
plot_cdf(ax1, charge_R9, xlim=(0, 6), n_bins=500, color="cornflowerblue", plot_type="R", legend=round(np.mean(R9["Redep"]), 1))
plot_cdf(ax1, charge_R17, xlim=(0, 6), n_bins=500, color="seagreen", plot_type="R", legend=round(np.mean(R17["Redep"]), 1))
ax1.set_xlabel("nPE", fontsize=15)
ax1.set_ylabel("CDF(nPE)", fontsize=15)
ax1.set_xlim((0, 6))
ax1.tick_params(axis="both", which="major", labelsize=12)
ax1.tick_params(axis="both", which='minor', labelsize=12)
ax1.grid()
ax1.legend(loc=(0.43, 1.01), ncol=3, frameon=False, prop={'size': 12})

ax2 = fig.add_axes([0.425, 0.1, 0.55, 0.55])
plot_hist(ax2, charge_R2, xlim=(0, 6), n_bins=100, color="darkmagenta", alpha=0.9)
plot_hist(ax2, charge_R9, xlim=(0, 6), n_bins=100, color="cornflowerblue", alpha=0.9)
plot_hist(ax2, charge_R17, xlim=(0, 6), n_bins=100, color="seagreen", alpha=0.9)
ax2.set_xlim((0, 6))
ax2.tick_params(axis="both", which="major", labelsize=12)
ax2.tick_params(axis="both", which='minor', labelsize=12)
ax2.grid()

fig.savefig("/home/ferracci/new_dataset/images/cdf_charge_energy.png", dpi=300, bbox_inches="tight", pad_inches=0.2);

In [None]:
fig = plt.figure(figsize=(8,5), dpi=150)

ax1 = fig.add_axes([0, 0, 1, 1])
plot_cdf(ax1, fht_R2, xlim=(0, 1250), n_bins=500, color="darkmagenta", plot_type="R", legend=round(np.mean(R2["Redep"]), 1))
plot_cdf(ax1, fht_R9, xlim=(0, 1250), n_bins=500, color="cornflowerblue", plot_type="R", legend=round(np.mean(R9["Redep"]), 1))
plot_cdf(ax1, fht_R17, xlim=(0, 1250), n_bins=500, color="seagreen", plot_type="R", legend=round(np.mean(R17["Redep"]), 1))
ax1.set_xlabel("t [ns]", fontsize=15)
ax1.set_ylabel("CDF(t)", fontsize=15)
ax1.set_xlim((0, 1250))
ax1.tick_params(axis="both", which="major", labelsize=12)
ax1.tick_params(axis="both", which='minor', labelsize=12)
ax1.grid()
ax1.legend(loc=(0.43, 1.01), ncol=3, frameon=False, prop={'size': 12})

ax2 = fig.add_axes([0.425, 0.1, 0.55, 0.55])
plot_hist(ax2, fht_R2, xlim=(0, 1200), n_bins=100, color="darkmagenta", alpha=0.9)
plot_hist(ax2, fht_R9, xlim=(0, 1200), n_bins=100, color="cornflowerblue", alpha=0.9)
plot_hist(ax2, fht_R17, xlim=(0, 1200), n_bins=100, color="seagreen", alpha=0.9)
ax2.set_xlim((0, 1200))
ax2.tick_params(axis="both", which="major", labelsize=12)
ax2.tick_params(axis="both", which='minor', labelsize=12)
ax2.grid()

fig.savefig("/home/ferracci/new_dataset/images/cdf_fht_energy.png", dpi=300, bbox_inches="tight", pad_inches=0.2);

### Features Exploration

In [None]:
# 1% of the dataset is enough for basic exploration 
features_dataframe = pd.read_csv("/mnt/ferracci/features_dataframe_new.csv.gz", nrows=10000)
targets_dataframe = pd.read_csv("/mnt/ferracci/targets_dataframe_new.csv.gz", nrows=10000)
Qedep = np.array(targets_dataframe["Qedep"])
Redep = np.array(targets_dataframe["Redep"])

In [None]:
corr_targets_dataframe = features_dataframe.corrwith(targets_dataframe["Qedep"])
print(corr_targets_dataframe.sort_values(ascending=False)[:12])
print(corr_targets_dataframe.sort_values(ascending=True)[:12])

In [None]:
plt.style.use("default")
plt.rc("text", usetex=True)
plt.rc("font", family="cm")
plt.rcParams["grid.color"] = (0.5, 0.5, 0.5, 0.2)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7,5), dpi=150)

ax = sns.heatmap(features_dataframe.corr(), vmin=-1, vmax=1, xticklabels=False, yticklabels=False)
ax.figure.axes[-1].yaxis.label.set_size(12)

fig.savefig("/home/ferracci/new_dataset/images/features_corr.png", dpi=300, bbox_inches="tight", pad_inches=0.1);

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(11,5), dpi=150)

ax = axes[0]
scatter = ax.scatter(features_dataframe["accum_charge"], Qedep, s=0.3, alpha=0.9, c=Redep, cmap="inferno")

ax.set_xlabel("AccumCharge", fontsize=15)
ax.set_ylabel("$E_{kin}$ [MeV]", fontsize=15)

ax.tick_params(axis="both", which="major", labelsize=12)
ax.tick_params(axis="both", which='minor', labelsize=12)

cbar = fig.colorbar(scatter, ax=ax, shrink=0.8, aspect=10)
cbar.ax.set_title("$R$ [m]", fontsize=15)
cbar.outline.set_edgecolor('k')
cbar.outline.set_linewidth(0.5)

ax.grid()

ax = axes[1]
scatter = ax.scatter(features_dataframe["nPMTs"], Qedep, s=0.3, alpha=0.9, c=Redep, cmap="inferno")

ax.set_xlabel("nPMTs", fontsize=15)
ax.set_ylabel("$E_{kin}$ [MeV]", fontsize=15)

ax.tick_params(axis="both", which="major", labelsize=12)
ax.tick_params(axis="both", which='minor', labelsize=12)

cbar = fig.colorbar(scatter, ax=ax, shrink=0.8, aspect=10)
cbar.ax.set_title("$R$ [m]", fontsize=15)
cbar.outline.set_edgecolor('k')
cbar.outline.set_linewidth(0.5)

ax.grid();

In [None]:
def plot_feature_distribution(ax, feature, feature_name, bins):
    ax.hist(feature, bins=bins, color="steelblue", density=True)
    ax.set_xlabel(feature_name, fontsize=15)
    ax.grid()

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15,8), dpi=100)

ax = axes[0, 0]
plot_feature_distribution(ax, features_dataframe["pe_std"], "$pe_{std}$", 50)

ax = axes[0, 1]
plot_feature_distribution(ax, features_dataframe["pe_skew"], "$pe_{skewness}$", 50)

ax = axes[0, 2]
plot_feature_distribution(ax, features_dataframe["pe_kurtosis"], "$pe_{kurtosis}$", 50)

ax = axes[0, 3]
plot_feature_distribution(ax, features_dataframe["pe_entropy"], "$pe_{entropy}$", 50)

ax = axes[1, 0]
plot_feature_distribution(ax, features_dataframe["pe_cht_std"], "$ht_{std}$", 50)

ax = axes[1, 1]
plot_feature_distribution(ax, features_dataframe["pe_cht_skew"], "$ht_{skewness}$", 50)

ax = axes[1, 2]
plot_feature_distribution(ax, features_dataframe["pe_cht_kurtosis"], "$ht_{kurtosis}$", 50)

ax = axes[1, 3]
plot_feature_distribution(ax, features_dataframe["pe_cht_entropy"], "$ht_{entropy}$", 50);

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(15,12), dpi=100)

ax = axes[0, 0]
plot_feature_distribution(ax, features_dataframe["pe10_cc"], "$pe_{10\%, cc}$", 50)

ax = axes[0, 1]
plot_feature_distribution(ax, features_dataframe["pe40_cc"], "$pe_{40\%, cc}$", 50)

ax = axes[0, 2]
plot_feature_distribution(ax, features_dataframe["pe70_cc"], "$pe_{70\%, cc}$", 50)

ax = axes[0, 3]
plot_feature_distribution(ax, features_dataframe["pe95_cc"], "$pe_{95\%, cc}$", 50)

ax = axes[1, 0]
plot_feature_distribution(ax, features_dataframe["pe10_cht"], "$pe_{10\%, cht}$", 50)

ax = axes[1, 1]
plot_feature_distribution(ax, features_dataframe["pe40_cht"], "$pe_{40\%, cht}$", 50)

ax = axes[1, 2]
plot_feature_distribution(ax, features_dataframe["pe70_cht"], "$pe_{70\%, cht}$", 50)

ax = axes[1, 3]
plot_feature_distribution(ax, features_dataframe["pe95_cht"], "$pe_{95\%, cht}$", 50)

ax = axes[2, 0]
plot_feature_distribution(ax, features_dataframe["ht10_5"], "$ht_{10\%-5\%}$", 50)

ax = axes[2, 1]
plot_feature_distribution(ax, features_dataframe["ht40_35"], "$ht_{40\%-35\%}$", 50)

ax = axes[2, 2]
plot_feature_distribution(ax, features_dataframe["ht70_65"], "$ht_{70\%-65\%}$", 50)

ax = axes[2, 3]
plot_feature_distribution(ax, features_dataframe["ht95_90"], "$ht_{95\%-90\%}$", 50);

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12,7), dpi=150)

ax = axes[0, 0]
plot_feature_distribution(ax, features_dataframe["gamma_x_cc"], "$\gamma_{x,cc}$", 100)

ax = axes[0, 1]
plot_feature_distribution(ax, features_dataframe["gamma_y_cc"], "$\gamma_{y,cc}$", 100)

ax = axes[0, 2]
plot_feature_distribution(ax, features_dataframe["gamma_z_cc"], "$\gamma_{z,cc}$", 100)

ax = axes[1, 0]
plot_feature_distribution(ax, features_dataframe["gamma_x_cht"], "$\gamma_{x,cht}$", 100)

ax = axes[1, 1]
plot_feature_distribution(ax, features_dataframe["gamma_y_cht"], "$\gamma_{y,cht}$", 100)

ax = axes[1, 2]
plot_feature_distribution(ax, features_dataframe["gamma_z_cht"], "$\gamma_{z,cht}$", 100);