In [None]:
# Analysis of results from channel_photoelectron_distribution.py

In [None]:
%matplotlib inline

In [None]:
import importlib
import waffles.np04_analysis.lightyield_vs_energy.scripts.utils as utils_module
from waffles.np04_analysis.lightyield_vs_energy.scripts.utils import *
import ast

In [None]:
plt.rcParams.update({
    'font.size': 10,
    'axes.titlesize': 13,
    'axes.labelsize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'figure.dpi': 300,  
})

In [None]:
def str_to_array(x):
    if isinstance(x, np.ndarray):
        return x
    elif isinstance(x, list):
        return np.array(x, dtype=float)
    elif isinstance(x, str):
        # Rimuove le parentesi quadre e sostituisce le virgole con spazi, poi splitta
        clean_str = x.replace('[', '').replace(']', '').replace(',', ' ')
        return np.array([float(i) for i in clean_str.split()])
    else:
        raise ValueError(f"Unsupported type: {type(x)}")

In [None]:
input_folder = "/afs/cern.ch/work/a/anbalbon/private/waffles/src/waffles/np04_analysis/lightyield_vs_energy/output/single_channels_study"
output_folder = input_folder

In [None]:
# Extrapolating information about photoelectron distributions fits from json file

with open(f"{input_folder}/PE_study_results.json", "r") as f:
    data = json.load(f)

for apa, apa_dict in data.items():
    all_channels = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_endpoint = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_apa = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_params_gaussian = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_eparams_gaussian = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_mu_gaussian = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_emu_gaussian = {"1":[], "2":[], "3":[], "5":[], "7":[]}

    all_params_langauss = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_eparams_langauss = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_peak_langauss = {"1":[], "2":[], "3":[], "5":[], "7":[]}
    all_epeak_langauss = {"1":[], "2":[], "3":[], "5":[], "7":[]}

    for end, end_dict in apa_dict.items():
        for ch, ch_dict in end_dict.items():
            for e in ['1', '2', '3', '5', '7']:
                try:
                    all_params_gaussian[e].append(ch_dict[e]['gaussian']['params'])
                    all_eparams_gaussian[e].append(ch_dict[e]['gaussian']['eparams'])
                    all_mu_gaussian[e].append(ch_dict[e]['gaussian']['mu'])
                    all_emu_gaussian[e].append(ch_dict[e]['gaussian']['emu'])

                    all_params_langauss[e].append(ch_dict[e]['langauss']['params'])
                    all_eparams_langauss[e].append(ch_dict[e]['langauss']['eparams'])
                    all_peak_langauss[e].append(ch_dict[e]['langauss']['peak'])
                    all_epeak_langauss[e].append(ch_dict[e]['langauss']['epeak'])

                except KeyError:
                    all_params_gaussian[e].append([])
                    all_eparams_gaussian[e].append([])
                    all_mu_gaussian[e].append([])
                    all_emu_gaussian[e].append([])

                    all_params_langauss[e].append([])
                    all_eparams_langauss[e].append([])
                    all_peak_langauss[e].append([])
                    all_epeak_langauss[e].append([])
                
                all_channels[e].append(ch)
                all_endpoint[e].append(end)
                all_apa[e].append(apa)


print(all_channels)

In [None]:
# Extrapolating information about linear fit from csv file
'''Visibility information channel by channel'''

df = pd.read_csv(f"{input_folder}/PE_study_results.csv")

df_visibility = pd.read_csv("/afs/cern.ch/work/a/anbalbon/private/waffles/src/waffles/np04_analysis/lightyield_vs_energy/data/visibility_arapuca_60r.csv")

df = df.merge(
    df_visibility[['vis','endpoint','DAQ_ch']],
    left_on=['Endpoint','Channel'],
    right_on=['endpoint','DAQ_ch'],
    how='left'
)

for col in ['Gaussian params', 'Langauss params']:
    df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else list(map(float, str(x).strip('[]').split())))  # converte anche "[a b]" in lista

for col in ['Gaussian eparams', 'Langauss eparams']:
    df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)


def apply_vis_to_second(lst, vis_val):
    new_lst = lst.copy()
    if len(new_lst) >= 2 and vis_val is not None:
        new_lst[1] = new_lst[1] / (vis_val)
    return new_lst

# --- 1️⃣ Applica la correzione per la visibilità (già lo fai)
for col in ['Gaussian params', 'Langauss params', 'Gaussian eparams', 'Langauss eparams']:
    df[f'vis {col}'] = df.apply(lambda row: apply_vis_to_second(row[col], row['vis']), axis=1)

# --- 2️⃣ Trova il massimo del secondo elemento di tutte le liste
max_second = {}
for col in ['Gaussian params', 'Langauss params', 'Gaussian eparams', 'Langauss eparams']:
    vis_col = f'vis {col}'
    max_val = df[vis_col].apply(lambda x: x[1] if isinstance(x, list) and len(x) > 1 else np.nan).max()
    max_second[vis_col] = max_val

# --- 3️⃣ Normalizza tutti i secondi elementi rispetto al massimo
def normalize_second(lst, max_val):
    if isinstance(lst, list) and len(lst) > 1 and max_val not in [0, None, np.nan]:
        new_lst = lst.copy()
        new_lst[1] = new_lst[1] / max_val
        return new_lst
    return lst

for col in ['Gaussian params', 'Langauss params', 'Gaussian eparams', 'Langauss eparams']:
    vis_col = f'vis {col}'
    df[f'norm {vis_col}'] = df[vis_col].apply(lambda lst: normalize_second(lst, max_second[vis_col]))

# print(df[['Gaussian params','vis','vis Gaussian params']].head())



In [None]:
def plot_two_params(df, col, axes, color='red', bins=20, title='Gaussian Fit Analysis'):
    intercepts = df[col].apply(str_to_array).apply(lambda x: x[0])
    slopes = df[col].apply(str_to_array).apply(lambda x: x[1])

    axes[0].hist(intercepts, bins=20, color=color, edgecolor='black')
    axes[0].set_xlabel('Intercept [$N_{{PE}}$]')
    axes[0].set_ylabel('Counts [AU]')
    axes[0].set_title(title + ' - Intercept')

    axes[1].hist(slopes, bins=20, color=color, edgecolor='black')
    axes[1].set_xlabel('Slope [$N_{{PE}}$/GeV]')
    axes[1].set_ylabel('Counts [AU]')
    axes[1].set_title(title + ' - Slope')

In [None]:
# Histogram of linear fit fit info for all channels - GAUSSIAN

fig, axes = plt.subplots(3, 2, figsize=(12, 15))

plot_two_params(df[df['APA']==1], 'Gaussian params', axes[0], color='#cce5ff', title='APA 1')
plot_two_params(df[df['APA']==2], 'Gaussian params', axes[1], color='#6699ff', title='APA 2')
plot_two_params(df, 'Gaussian params', axes[2], color='#003399', title='ALLA CHANNELS')

fig.suptitle('Gaussian fit', fontsize=20)
plt.tight_layout()
plt.savefig(f"{output_folder}/gaussian_params_hist.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Histogram of linear fit fit info for all channels - LANGAUSS

fig, axes = plt.subplots(3, 2, figsize=(12, 15))

plot_two_params(df[df['APA']==1], 'Langauss params', axes[0], color='#ccffcc', title='APA 1')
plot_two_params(df[df['APA']==2], 'Langauss params', axes[1], color='#66cc66', title='APA 2')
plot_two_params(df, 'Langauss params', axes[2], color='#006600', title='ALLA CHANNELS')

fig.suptitle('Langauss fit', fontsize=20)
plt.tight_layout()
plt.savefig(f"{output_folder}/langauss_params_hist.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Histogram of linear fit fit info for all channels - DIFFERENCE

df['diff_params'] = df.apply(
    lambda row: [
        str_to_array(row['Gaussian params'])[0] - str_to_array(row['Langauss params'])[0],
        str_to_array(row['Gaussian params'])[1] - str_to_array(row['Langauss params'])[1]
    ],
    axis=1
)

fig, axes = plt.subplots(3, 2, figsize=(12, 15))

plot_two_params(df[df['APA']==1], 'diff_params', axes[0], color='#ffcce6', title='APA 1')
plot_two_params(df[df['APA']==2], 'diff_params', axes[1], color='#ff66b2', title='APA 2')
plot_two_params(df, 'diff_params', axes[2], color='#cc0066', title='ALL CHANNELS')

fig.suptitle('Params difference', fontsize=20)
plt.tight_layout()
plt.savefig(f"{output_folder}/diff_params_hist.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
import seaborn as sns

def plot_apa_final(df, fit_function):
    apas_to_plot = [1, 2]
    
    # Create figure with two subplots
    fig, axes = plt.subplots(1, 2, figsize=(20, 12))
    
    # Calculate global min/max for a consistent color scale
    # We use a try-except to avoid crash if some rows are problematic
    def get_val_safe(x, idx):
        try:
            return str_to_array(x)[idx]
        except:
            return np.nan

    all_slopes = df[f'{fit_function} params'].apply(lambda x: get_val_safe(x, 1))
    vmin, vmax = all_slopes.min(), all_slopes.max()

    for i, apa_id in enumerate(apas_to_plot):
        # 1. Create lookup: (Endpoint, Channel) -> (slope, error)
        df_apa = df[df['APA'] == apa_id]
        lookup = {}
        
        for _, row in df_apa.iterrows():
            try:
                params = str_to_array(row[f'{fit_function} params'])
                eparams = str_to_array(row[f'{fit_function} eparams'])
                lookup[(row['Endpoint'], row['Channel'])] = (params[1], eparams[1])
            except:
                continue

        # 2. Initialize matrices for heatmap (colors) and annotations (text)
        grid_values = np.full((10, 4), np.nan)
        grid_labels = np.full((10, 4), "", dtype=object)
        
        apa_obj = APA_map[apa_id]
        
        # 3. Fill the grids
        for r in range(10):
            for c in range(4):
                unique_ch = apa_obj.data[r][c]
                ep, ch = unique_ch.endpoint, unique_ch.channel
                
                data_point = lookup.get((ep, ch), None)
                
                if data_point is not None:
                    val, err = data_point
                    grid_values[r, c] = val
                    grid_labels[r, c] = f"END: {ep} - CH: {ch}\nB = ({val:.0f} $\pm$ {err:.0f}) $\\frac{{N_{{PE}}}}{{GeV}}$"
                else:
                    grid_labels[r, c] = f"END: {ep} - CH: {ch}\nNo data"

        # 4. Draw the Heatmap
        sns.heatmap(grid_values, 
                    annot=grid_labels, 
                    fmt="", 
                    cmap="YlOrRd", 
                    ax=axes[i],
                    linewidths=1,
                    linecolor='black',
                    vmin=vmin, 
                    vmax=vmax,
                    cbar_kws={'label': 'Gaussian Slope Value'})
        
        axes[i].set_title(f"APA {apa_id} - {fit_function} fit", fontsize=18, fontweight='bold', pad=20)
        axes[i].axis('off')

    plt.tight_layout()
    plt.savefig(f"{output_folder}/{fit_function}_hitmap.png", dpi=300, bbox_inches='tight')
    plt.show()


In [None]:
# Heatmap of linear fit slope for all channels - GAUSSIAN
plot_apa_final(df, 'Gaussian')

In [None]:
# Heatmap of linear fit slope for all channels - LANGAUSS
plot_apa_final(df, 'Langauss')

In [None]:
plot_apa_final(df, 'vis Langauss')

In [None]:
plot_apa_final(df, 'norm vis Langauss')