# Import required tools

In [2]:
import os
import pickle
import datetime
import numpy as np
import pandas as pd
import plotly.graph_objects as pgo
import matplotlib.pyplot as plt

import sys
waffles_dir = '/Users/acervera/HEP/DUNE/ProtoDUNE-HD/PDS/data_taking/waffles'
sys.path.append(waffles_dir+'/src') 

from waffles.np04_utils.utils import get_endpoint_and_channel
from waffles.np04_analysis.LED_calibration.calibration_batches.metadata import metadata

# Define some helper functions

In [3]:
def compute_timestamp(day, month, year):
    """This function generates a timestamp from a date. 
    The date is given as three integer values which match 
    the day, the month and the year, respectively. The 
    timestamp is the number of seconds since 
    1970-01-01 00:00:00 UTC.
    
    """
    dt = datetime.datetime(year, month, day)
    return dt.timestamp()

def get_apa_foldername(measurements_batch, apa_no):
    """This function encapsulates the non-homogeneous 
    naming convention of the APA folders depending 
    on the measurements batch.""" 

    if measurements_batch not in [1, 2, 3]:
        raise ValueError(f"Measurements batch {measurements_batch} is not valid")
    
    if apa_no not in [1, 2, 3, 4]:
        raise ValueError(f"APA number {apa_no} is not valid")
                         
    if measurements_batch == 1:
        if apa_no in [1, 2]:
            return 'apas_12'
        else:
            return 'apas_34'
        
    if measurements_batch in [2, 3]:
        if apa_no == 1:
            return 'apa_1'
        elif apa_no == 2:
            return 'apa_2'
        else:
            return 'apas_34'

# Set the input variables

In [10]:
apa_nos = [2]
batches = [2]
variable = 'snr'
showlegend = False

input_base_folderpath = '/Users/acervera/HEP/DUNE/ProtoDUNE-HD/PDS/data_taking/waffles/src/waffles/np04_analysis/LED_calibration/calibration_batches'
path_to_output_folderpath = ''

if variable not in ['gain', 'snr']:
    raise Exception('Either gain or snr must be selected')

# Load the dataframe

In [11]:
dataframes = {}

for batch in batches:

    aux_file_path = os.path.join(
        os.getcwd(), 
        f"{input_base_folderpath}/batch_{batch}/output_pickles/LED_calibration_data.pkl")

    with open(aux_file_path, "rb") as file:
        dataframes[batch] = pickle.load(file)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/acervera/HEP/DUNE/ProtoDUNE-HD/PDS/data_taking/waffles/docs/examples/data/batch_2/output_pickles/LED_calibration_data.pkl'

# Add 'batch' column and merge into single dataframe

In [None]:
for batch in dataframes.keys():

    aux = [batch] * len(dataframes[batch])
    dataframes[batch]['batch_no'] = aux
    dataframes[batch]['batch_no'] = dataframes[batch]['batch_no'].astype(int)

general_df = pd.concat(
    list(dataframes.values()), 
    ignore_index=True)

# Some input parameters for plots

In [None]:
pdes = [0.40, 0.45, 0.50]
colors = {
    0.4: 'black', 
    0.45: 'green', 
    0.5: 'red'}
symbols = {
    0.4: 'circle', 
    0.45: 'square',
    0.5: 'diamond'}

translator = {'gain': 'Gain', 'snr': 'SNR'}
y_label = {'gain': 'center[1] - center[0]',
           'snr': '(center[1]-center[0])/sqrt( std[0]**2  + std[1]**2 )'}

# Batch-wise plots

In [None]:
for apa_no in apa_nos:

    for i in range(len(batches)):
        
        batch_no = batches[i]

        # Get the data for the given APA and batch
        current_df = general_df[
            (general_df['APA'] == apa_no) & 
            (general_df['batch_no'] == batch_no)]

        fig = pgo.Figure()

        for j in range(len(pdes)):

            aux = current_df[current_df['PDE'] == pdes[j]]

            fig.add_trace(pgo.Scatter(  
                x=aux['channel_iterator'],
                y=aux[variable],
                mode='markers',
                marker=dict(
                    size=5, 
                    color=colors[pdes[j]],
                    symbol=symbols[pdes[j]]),
                name=f"PDE = {pdes[j]}",
            ))

        title = f"{translator[variable]} per channel in APA {apa_no} - "\
                f"Batch {batch_no} ({metadata[batch_no]['date_year']}/"\
                f"{metadata[batch_no]['date_month']}/{metadata[batch_no]['date_day']}"\
                f")"

        fig.update_layout(
            title={
                    'text': title,
                    'font': {'size': 18},
                },
            xaxis_title='Channel',
            yaxis_title=y_label[variable],
            width=1000,
            height=400,
            showlegend=True,
        )

        labels = {}
        for j in range(current_df.shape[0]):
            labels[current_df.iloc[j]['channel_iterator']] = f"{int(current_df.iloc[j]['endpoint'])}-{int(current_df.iloc[j]['channel'])}"

        fig.update_layout(
            xaxis = dict(   
                tickmode='array',
                tickvals=list(labels.keys()),
                ticktext=list(labels.values()),
                tickangle=45,
            )
        )

        fig.show()
        fig.write_image(f"{path_to_output_folderpath}/batch_{batch_no}/{get_apa_foldername(batch_no, apa_no)}/general_plots/apa_{apa_no}_clustered_{variable}s.png")

# Prepare the data for the plot against time

In [None]:
time = [ compute_timestamp(
    metadata[batch_no]['date_day'], 
    metadata[batch_no]['date_month'], 
    metadata[batch_no]['date_year']) for batch_no in batches ]

time_labels = [
    f"{metadata[batch_no]['date_year']}/"
    f"{metadata[batch_no]['date_month']}/"
    f"{metadata[batch_no]['date_day']}" for batch_no in batches ]

In [None]:
data = {}

for apa_no in apa_nos:

    data[apa_no] = {}

    for i in range(len(pdes)):
        
        current_df = general_df[
            (general_df['APA'] == apa_no) &
            (general_df['PDE'] == pdes[i])]
        
        data[apa_no][pdes[i]] = {}

        possible_channel_iterators = current_df['channel_iterator'].unique()
        
        for channel_iterator in possible_channel_iterators:
            
            aux = current_df[current_df['channel_iterator'] == channel_iterator]
            time_ordered_values_of_variable = []

            # Here's why the data is ordered by batch number, i.e. ordered by time
            for batch_no in batches:

                aux2 = aux[aux['batch_no'] == batch_no]
                if len(aux2) == 0:
                    print(f"Warning: Found no entry for APA {apa_no}, PDE {pdes[i]}, batch {batch_no} and channel iterator {channel_iterator}.")
                elif len(aux2) == 1:
                    time_ordered_values_of_variable.append(
                        aux2[variable].values[0])
                else:
                    raise Exception(f"Found more than one entry for APA {apa_no}, PDE {pdes[i]}, batch {batch_no} and channel iterator {channel_iterator}.")
                    
            data[apa_no][pdes[i]][channel_iterator] = time_ordered_values_of_variable

# Plot against time, apa-wise

In [None]:
for apa_no in data.keys():
    
    fig = pgo.Figure()

    for pde in data[apa_no].keys():

        for channel_iterator in data[apa_no][pde].keys():

            unique_channel = get_endpoint_and_channel(
                apa_no, 
                channel_iterator)

            fig.add_trace(
                pgo.Scatter(
                    x=time,
                    y=data[apa_no][pde][channel_iterator],
                    mode='lines+markers',
                    name=f"PDE = {pde}, channel {unique_channel}",
                    line=dict(
                        color=colors[pde],
                        width=0.5),
                    marker=dict(
                        size=5,
                        color=colors[pde],
                        symbol=symbols[pde])
                )
            )
            
    title = f"{translator[variable]} per channel in APA {apa_no}"

    fig.update_layout(
        title = {
                    'text': title,
                    'font': {'size': 18},
                },
        #xaxis_title='Time',
        yaxis_title=y_label[variable],
        width=800,
        height=400,
    )

    fig.update_layout(
        xaxis=dict( 
            tickmode='array',
            tickvals=time,
            ticktext=time_labels,
            tickangle=15,
            tickfont=dict(size=16)
        ),
        showlegend=showlegend
    )
            
    fig.show()
    fig.write_image(f"{path_to_output_folderpath}/general_plots/apa_{apa_no}_{variable}s_with_time.png")

            

# Plot against time, pde-wise

In [None]:
for apa_no in data.keys():

    for pde in data[apa_no].keys():

        fig = pgo.Figure()

        for channel_iterator in data[apa_no][pde].keys():

            unique_channel = get_endpoint_and_channel(
                apa_no, 
                channel_iterator)

            fig.add_trace(
                pgo.Scatter(
                    x=time,
                    y=data[apa_no][pde][channel_iterator],
                    mode='lines+markers',
                    name=f"PDE = {pde}, channel {unique_channel}",
                    line=dict(
                        color=colors[pde],
                        width=0.5),
                    marker=dict(
                        size=5,
                        color=colors[pde],
                        symbol=symbols[pde])
                )
            )
            
        title = f"{translator[variable]} per channel in APA {apa_no}"\
                f" - PDE {pde}"

        fig.update_layout(
            title={
                    'text': title,
                    'font': {'size': 18},
                    },
            xaxis_title='Time',
            yaxis_title=y_label[variable],
            width = 800,
            height=400,
        )

        fig.update_layout(
            xaxis=dict( 
                tickmode='array',
                tickvals=time,
                ticktext=time_labels,
                tickangle=15,
                tickfont=dict(size=16)
            ),
            showlegend=showlegend
        )
                
        fig.show()

# Std histograms, pde-wise

In [None]:
bin_width = 20 if variable == 'gain' else 0.1

fig, axes = plt.subplots(
    nrows=1,
    ncols=len(apa_nos),
    figsize=(15, 5)
)

i = 0

for apa_no in data.keys():

    for pde in data[apa_no].keys():

        samples = []
        
        for channel_iterator in data[apa_no][pde].keys():

            samples.append(np.std(data[apa_no][pde][channel_iterator]))

        samples = np.array(samples)

        axes[i].hist(
            samples, 
            bins=int((samples.max()-samples.min())/bin_width)+1,
            histtype="step", 
            color=colors[pde], 
            label=f"PDE = {pde}")
        
        axes[i].set_xlabel(y_label[variable])
        
        axes[i].legend(
            fontsize = 15
        )

        axes[i].grid()
        
    axes[i].set_title(f"APA {apa_no}")
    
    i += 1
            
title = f"{translator[variable]} STD (over time)"

fig.suptitle(title, fontsize=16)
plt.tight_layout()
plt.show()