# Import required tools

In [2]:
import os
import plotly.subplots as psu
import numpy as np
import pandas as pd

import sys
waffles_dir = '/Users/acervera/HEP/DUNE/ProtoDUNE-HD/PDS/data_taking/waffles'
sys.path.append(waffles_dir+'/src') 

from waffles.data_classes.Waveform import Waveform
from waffles.data_classes.WaveformSet import WaveformSet
from waffles.data_classes.ChannelWsGrid import ChannelWsGrid
from waffles.data_classes.IPDict import IPDict
from waffles.data_classes.BasicWfAna import BasicWfAna
from waffles.input_output.raw_root_reader import WaveformSet_from_root_files
from waffles.input_output.pickle_file_reader import WaveformSet_from_pickle_files
from waffles.utils.fit_peaks.fit_peaks import fit_peaks_of_ChannelWsGrid
from waffles.plotting.plot import plot_ChannelWsGrid
from waffles.np04_utils.utils import get_channel_iterator
from waffles.np04_analysis.led_calibration.configs.calibration_batches.LED_configuration_to_channel import config_to_channels
from waffles.np04_analysis.led_calibration.configs.calibration_batches.run_number_to_LED_configuration import run_to_config
from waffles.np04_analysis.led_calibration.configs.calibration_batches.excluded_channels import excluded_channels
from waffles.np04_data.ProtoDUNE_HD_APA_maps import APA_map

# Define some useful functions

In [47]:
def get_run_folderpath(run, base_folderpath):
    return f"{base_folderpath}"
#    return f"{base_folderpath}/data/run_0{run}"

def get_apa_foldername(measurements_batch, apa_no):
    """This function encapsulates the non-homogeneous 
    naming convention of the APA folders depending 
    on the measurements batch.""" 

    if measurements_batch not in [1, 2, 3]:
        raise ValueError(f"Measurements batch {measurements_batch} is not valid")
    
    if apa_no not in [1, 2, 3, 4]:
        raise ValueError(f"APA number {apa_no} is not valid")
                         
    if measurements_batch == 1:
        if apa_no in [1, 2]:
            return 'apas_12'
        else:
            return 'apas_34'
        
    if measurements_batch in [2, 3]:
        if apa_no == 1:
            return 'apa_1'
        elif apa_no == 2:
            return 'apa_2'
        else:
            return 'apas_34'

def comes_from_channel(
        waveform: Waveform, 
        endpoint, 
        channels) -> bool:

    if waveform.endpoint == endpoint:
        if waveform.channel in channels:
            return True
    return False

# Set the input variables

In [None]:
# Path where to look for the root files or the pickle files
#data_folderpath = '/Users/acervera/HEP/DUNE/ProtoDUNE-HD/PDS/data_taking/waffles/docs/examples'
base_folderpath = '/Users/acervera/HEP/DUNE/ProtoDUNE-HD/PDS/data_taking/waffles/docs/examples/data'
# Path where to save the plots
plots_saving_filepath = ''

measurements_batch = 2  # 1, ...
fProcessRootNotPickles = True if measurements_batch == 1 else False
apa_no = 2  # 1, 2, 3, 4
pde = 0.45  # 0.40, 0.45, 0.50
path_to_output_summary_dataframe = os.path.join(
    os.getcwd(), 
    f"calibration_batches/batch_{measurements_batch}/output.pkl")

hpk_ov = {0.4 : 2.0, 0.45 : 3.5, 0.50 : 4.0}[pde]
fbk_ov = {0.4 : 3.5, 0.45 : 4.5, 0.50 : 7.0}[pde]
ov_no = {0.4 : 1, 0.45 : 2, 0.50 : 3}[pde]

# No entiendo como lo adapta
aux = 'apa_2' if apa_no == 2 else 'apas_34'
data_folderpath = f"{base_folderpath}/batch_{measurements_batch}/{aux}/{pde}"

print(data_folderpath)



In [67]:
run_to_config_ = run_to_config[measurements_batch][apa_no][pde]
config_to_channels_ = config_to_channels[measurements_batch][apa_no][pde]
excluded_channels_ = excluded_channels[measurements_batch]

# Set the analysis input parameters

In [68]:
analysis_label = 'standard'

if apa_no == 1:

    starting_tick = {
        27818: 621,
        27820: 615,
        27822: 615,
        27823: 615,
        27824: 615,
        27825: 615,
        27826: 615,
        27827: 632,
        27828: 626,
        27898: 635,
        27899: 635,
        27900: 618,
        27921: 602,
        27901: 615,
        27902: 615,
        27903: 615,
        27904: 630,
        27905: 620,
        27906: 610,
        27907: 608,
        27908: 602
    }
    
    baseline_limits = [100, 400]

else:

    starting_tick = {
        run: 125 for run in run_to_config_.keys()
    }
    baseline_limits = [0, 100, 900, 1000]

# Integration window width
aux_width = 40

input_parameters = IPDict(baseline_limits=baseline_limits)
checks_kwargs = IPDict()

# Read and analyse data

In [None]:
first_run = list(run_to_config_.keys())[0]
first_endpoint = list(config_to_channels_[run_to_config_[first_run]].keys())[0]
channels = config_to_channels_[run_to_config_[first_run]][first_endpoint]

if fProcessRootNotPickles:

    stop_fraction = 1.0
    wfset = WaveformSet_from_root_files(
        "pyroot",
        folderpath=get_run_folderpath(first_run, data_folderpath),
        bulk_data_tree_name="raw_waveforms",
        meta_data_tree_name="metadata",
        set_offset_wrt_daq_window=True if apa_no == 1 else False,
        read_full_streaming_data=True if apa_no == 1 else False,
        truncate_wfs_to_minimum=True if apa_no == 1 else False,
        start_fraction=0.0,
        stop_fraction=stop_fraction,
        subsample=1,
        verbose=True,
    )

else:

    wfset = WaveformSet_from_pickle_files(
        folderpath=get_run_folderpath(first_run, data_folderpath),
        target_extension=".pkl",
        verbose=True,
    )

In [70]:
wfset = WaveformSet.from_filtered_WaveformSet(
    wfset, 
    comes_from_channel, 
    first_endpoint, 
    channels
)

input_parameters['int_ll'] = starting_tick[first_run]
input_parameters['int_ul'] = starting_tick[first_run] + aux_width
input_parameters['amp_ll'] = starting_tick[first_run]
input_parameters['amp_ul'] = starting_tick[first_run] + aux_width
checks_kwargs['points_no'] = wfset.points_per_wf

_ = wfset.analyse(
    analysis_label,
    BasicWfAna,
    input_parameters,
    *[],  # *args,
    analysis_kwargs={},
    checks_kwargs=checks_kwargs,
    overwrite=True
)

In [None]:
for run in run_to_config_.keys():
    for endpoint in config_to_channels_[run_to_config_[run]].keys():
        print (run)
        if run == first_run and endpoint == first_endpoint:
            continue

        if fProcessRootNotPickles:
            new_wfset = WaveformSet_from_root_files(
                "pyroot",
                folderpath=get_run_folderpath(run, data_folderpath),
                bulk_data_tree_name="raw_waveforms",
                meta_data_tree_name="metadata",
                set_offset_wrt_daq_window=True if apa_no == 1 else False,
                read_full_streaming_data=True if apa_no == 1 else False,
                truncate_wfs_to_minimum=True if apa_no == 1 else False,
                start_fraction=0.0,
                stop_fraction=stop_fraction,
                subsample=1,
            )
        else:
            print (data_folderpath)
            new_wfset = WaveformSet_from_pickle_files(                
                folderpath=get_run_folderpath(run, data_folderpath),
                target_extension=".pkl",
                verbose=True,
            )

        new_wfset = WaveformSet.from_filtered_WaveformSet(
            new_wfset,
            comes_from_channel,
            endpoint,
            config_to_channels_[run_to_config_[run]][endpoint],
        )

        input_parameters['int_ll'] = starting_tick[run]
        input_parameters['int_ul'] = starting_tick[run] + aux_width
        input_parameters['amp_ll'] = starting_tick[run]
        input_parameters['amp_ul'] = starting_tick[run] + aux_width
        checks_kwargs['points_no'] = new_wfset.points_per_wf

        print("\n Now analysing waveforms from:")
        print(f" - run {run}")
        print(f" - endpoint {endpoint}")
        print(f" - channels {config_to_channels_[run_to_config_[run]][endpoint]} \n")      

        _ = new_wfset.analyse(
            analysis_label,
            BasicWfAna,
            input_parameters,
            *[],  # *args,
            analysis_kwargs={},
            checks_kwargs=checks_kwargs,
            overwrite=True,
        )
        
        wfset.merge(new_wfset)

# Inspect the read channels

In [None]:
wfset.available_channels

# Set the fitting input parameters

In [73]:
# Maximum number of peaks to fit
max_peaks = 2

# Minimal prominence, as a fraction of the y-range, for a peak to be detected
prominence = 0.15 # [0.10 - 0.2]

# The number of points to fit on either side of the peak maximum
# P.e. setting this to 2 will fit 5 points in total: the maximum and 2 points on either side
half_points_to_fit = 2 # [2 - 3]       

# Number of bins for the histogram
bins_number = 125 # [90 - 125]

if apa_no in [2, 3, 4]:
    if pde == 0.4:
        bins_number = 125
    elif pde == 0.45:
        bins_number = 110 # [100-110]
    else:
        bins_number = 90

In [74]:
grid_apa = ChannelWsGrid(
    APA_map[apa_no],
    wfset,
    compute_calib_histo=True,
    bins_number=bins_number,
    domain=np.array((-10000.0, 50000.0)),
    variable="integral",
    analysis_label=None,
)

In [None]:
fit_peaks_of_ChannelWsGrid(
    grid_apa,
    max_peaks,
    prominence,
    half_points_to_fit,
    initial_percentage=0.15,
    percentage_step=0.05,
)

In [76]:
title = f"APA {apa_no} - Runs {list(wfset.runs)}"

# Plot calibration histograms

In [None]:
figure = psu.make_subplots(
    rows=10, 
    cols=4
)

plot_ChannelWsGrid(
    grid_apa,
    figure=figure,
    share_x_scale=False,
    share_y_scale=False,
    mode="calibration",
    wfs_per_axes=None,
    analysis_label=analysis_label,
    plot_peaks_fits=True,
    detailed_label=False,
    verbose=True,
)

figure.update_layout(
    title={
        "text": title,
        "font": {"size": 24}
    },
    width=1100,
    height=1200,
    showlegend=True,
)

figure.show()

# figure.write_image(f"{plots_saving_filepath}/apa_{apa_no}_calibration_histograms.png")

# Extract the fit data

In [None]:
data = {}

for i in range(grid_apa.ch_map.rows):
    for j in range(grid_apa.ch_map.columns):

        if grid_apa.ch_map.data[i][j].endpoint in excluded_channels_[apa_no][pde].keys():
            if grid_apa.ch_map.data[i][j].channel in excluded_channels_[apa_no][pde][grid_apa.ch_map.data[i][j].endpoint]:
                print(f"Excluding channel {grid_apa.ch_map.data[i][j].channel} from endpoint {grid_apa.ch_map.data[i][j].endpoint}...")
                continue

        try:
            fit_params = grid_apa.ch_wf_sets[grid_apa.ch_map.data[i][j].endpoint][grid_apa.ch_map.data[i][j].channel].calib_histo.gaussian_fits_parameters
        except KeyError:
            print(f"Endpoint {grid_apa.ch_map.data[i][j].endpoint}, channel {grid_apa.ch_map.data[i][j].channel} not found in data. Continuing...")
            continue

        try:
            aux = data[grid_apa.ch_map.data[i][j].endpoint]
        except KeyError:
            data[grid_apa.ch_map.data[i][j].endpoint] = {}
            aux = data[grid_apa.ch_map.data[i][j].endpoint]
    
        try:
            aux_gain = fit_params['mean'][1][0] - fit_params['mean'][0][0]
        except IndexError:
            print(f"Endpoint {grid_apa.ch_map.data[i][j].endpoint}, channel {grid_apa.ch_map.data[i][j].channel} not found in data. Continuing...")
            continue

        try:
            aux_2 = aux[grid_apa.ch_map.data[i][j].channel]
        except KeyError:
            aux[grid_apa.ch_map.data[i][j].channel] = {}
            aux_2 = aux[grid_apa.ch_map.data[i][j].channel]

        aux_2['gain'] = aux_gain
        aux_2['snr'] = aux_gain/np.sqrt( fit_params['std'][0][0]**2 + fit_params['std'][1][0]**2 )


# Save the fit data to a running dataframe

In [None]:
# Warning: Settings this variable to True will save
# changes to the output dataframe, potentially introducing
# spurious data. Only set it to True if you are sure of what
# you are saving.
actually_save = True   

# Do you want to potentially overwrite existing rows of the dataframe?
overwrite = True

expected_columns = {
    "APA": [],
    "endpoint": [],
    "channel": [],
    "channel_iterator": [],
    "PDE": [],
    "gain": [],
    "snr": [],
    "OV#": [],
    "HPK_OV_V": [],
    "FBK_OV_V": [],
}

# If the file does not exist, create it
if not os.path.exists(path_to_output_summary_dataframe):

    df = pd.DataFrame(expected_columns)

    # Force column-wise types
    df['APA'] = df['APA'].astype(int)
    df['endpoint'] = df['endpoint'].astype(int)
    df['channel'] = df['channel'].astype(int)
    df['channel_iterator'] = df['channel_iterator'].astype(int)
    df['PDE'] = df['PDE'].astype(float)
    df['gain'] = df['gain'].astype(float)
    df['snr'] = df['snr'].astype(float)
    df['OV#'] = df['OV#'].astype(int)
    df['HPK_OV_V'] = df['HPK_OV_V'].astype(float)
    df['FBK_OV_V'] = df['FBK_OV_V'].astype(float)

    df.to_pickle(path_to_output_summary_dataframe)

df = pd.read_pickle(path_to_output_summary_dataframe)

if len(df.columns) != len(expected_columns):
    raise Exception(f"The columns of the found dataframe do not match the expected ones. Something went wrong.")

elif not bool(np.prod(df.columns == pd.Index(data = expected_columns))):
    raise Exception(f"The columns of the found dataframe do not match the expected ones. Something went wrong.")

else:
    for endpoint in data.keys():
        print ("anselmo 1")
        for channel in data[endpoint]:
            print ("anselmo 2")
            # Assemble the new row
            new_row = {
                "APA": [int(apa_no)],
                "endpoint": [endpoint],
                "channel": [channel],
                "channel_iterator": [get_channel_iterator(apa_no, endpoint, channel)],
                "PDE": [pde],
                "gain": [data[endpoint][channel]["gain"]],
                "snr": [data[endpoint][channel]["snr"]],
                "OV#": [ov_no],
                "HPK_OV_V": [hpk_ov],
                "FBK_OV_V": [fbk_ov],
            }

            # Check if there is already an entry for the
            # given endpoint and channel for this OV
            matching_rows_indices = df[
                (df['endpoint'] == endpoint) &       
                (df['channel'] == channel) &
                (df['OV#'] == ov_no)].index          

            if len(matching_rows_indices) > 1:
                raise Exception(f"There are already more than one rows for the given endpoint ({endpoint}), channel ({channel}) and OV# ({ov_no}). Something went wrong.")

            elif len(matching_rows_indices) == 1:
                if overwrite:

                    row_index = matching_rows_indices[0]

                    new_row = { key : new_row[key][0] for key in new_row.keys() }  

                    if actually_save:
                        df.loc[row_index, :] = new_row

                else:
                    print(f"Skipping the entry for endpoint {endpoint}, channel {channel} and OV# {ov_no} ...")

            else: # len(matching_rows_indices) == 0
                print ("anselmo 3")
                if actually_save:
                    df = pd.concat([df, pd.DataFrame(new_row)], axis = 0, ignore_index = True)
                    df.reset_index()
    df.to_pickle(path_to_output_summary_dataframe)

# Display the dataframe

In [None]:
df

In [81]:
with open('/Users/acervera/HEP/DUNE/ProtoDUNE-HD/PDS/data_taking/waffles/src/waffles/np04_analysis/led_calibration/calibration_batches/batch_2/output.pkl', 'rb') as f:
    df2 = pd.read_pickle(f)

In [None]:
df2