In [None]:
#  Description : Working Nb for code either currently in development, not fully documented, or simply not yet part of a self-contained pipeline.

# Date    : 2024-06-11 10:37:25
# Author  : Karla Onate Melecio <kgom.astro@gmail.com>
# Version : 3.10.12


import sys
import struct
import pathlib

import pandas as pd
import numpy as np




**function: pull_feather_files_from_L0_directory(run_id, channels, ps_window=30000)**


This function processes all files in the given directory and returns the tagged, singles, and "all" data for the specified list of channels. This assumes you have already produced the event lookup table using the data level processing tools

In [None]:
def pull_feather_files_from_L0_directory(run_id, channels, ps_window=30000):
    """
    Process all files in the given directory and returns the tagged, singles, and "all" data for the specified list of channels.

    Parameters:
    ----------
    run_ids : list of int
        List of run IDs to process.
    channels : list of int
        List of channel IDs to include in the analysis.
    ps_window : int, optional
        The peak search window size (default is 30000).

    Returns:
    -------
    all_dfs : list of pd.DataFrame
        DataFrames containing processed data for each run.
    tagged : list of pd.DataFrame
        DataFrames containing tagged events.
    singles : list of pd.DataFrame
        DataFrames containing singles events.

    TODO:
    -------
    Implement multi-event functionality; singles can likely be removed as the dataframe with "all" data contain a column with 'multiplicity' information.
    """
    singles =[]
    tagged = []
    all_dfs = []

    for run_id in run_id:
        # Read L0 data
        data = pd.read_feather(f'../data/L0/L0_{run_id}.feather')
        data['hit_id'] = data.index.values
        data['sec'] = ((data.time - data.time.min()) // 1e12).astype('int64')


        # Calculate rate for each second
        seconds, rate = np.unique(data.sort_values(by='sec').sec, return_counts=True)
        rate_data = pd.DataFrame({'sec': seconds, 'rate': rate})
        data = pd.merge(data, rate_data, on='sec')

        # Read event lookup table
        event_table = pd.read_feather(f'../data/ancillary/lookup_tables/event/events_{ps_window}ps_window_{run_id}.feather')
        event_table = event_table.rename(columns={'hits': 'hit_id'})
        

        # Filter data for tagged detectors and specific channels
        tdata = data[data.channelID.isin([81, 83])]
        gdata = data[data.channelID.isin(channels)]

        # Merge with event table for tagged events
        cal_events = event_table[event_table.hit_id.isin(tdata.hit_id.values)]
        t_events = pd.merge(tdata, cal_events, on='hit_id')


        # Merge with event table for gagg events
        g_events = event_table[event_table.hit_id.isin(gdata.hit_id.values)]
        gdata = pd.merge(gdata, g_events, on='hit_id')


        # Merge 2-site tagged events
        tgd = pd.merge(gdata, t_events, on='event_id', suffixes=('', '_t'))
        tgd = tgd[['event_id', 'sec', 'multiplicity', 'channelID', 'channelID_t', 'adc', 'adc_t', 'rate', 'time', 'time_t']]

        # Calculate time difference between hits
        tgd['dt'] = tgd.time - tgd.time_t

        # Append results to lists
        all_dfs.append(gdata)
        tagged.append(tgd)
        singles.append(gdata[gdata.multiplicity==1])
        
    

    return all_dfs, tagged, singles 