In [None]:
import numpy as np 
import h5py
import math
import pandas as pd
import os, sys
import random
import time
import io

In [None]:
# set locations for working files
if len(sys.argv) != 3:
    print("Usage: python3 process-sim.py <automation_dir> <attpcroot_dir>")
    print('Assuming testing directories')
    automation_dir = '/mnt/analysis/e17023/Adam/GADGET/.sims/0/'
    attpcroot_dir = automation_dir + 'ATTPCROOTv2/'
else:
    # Automation directory
    automation_dir = sys.argv[1]
    
    # ATTPCROOTv2 directory
    attpcroot_dir = sys.argv[2]

In [None]:
def indicator_file(file_type, indicator_directory=automation_dir):
    # remove old indicator file(s)
    for file in os.listdir(indicator_directory):
        if file.endswith('.tmp'):
            os.remove(indicator_directory + file)
    
    with open(indicator_directory + file_type + '.tmp', 'w') as f:
        f.write('1')
    if file_type == 'STOP':
        print('STOPPING')
        sys.exit()
    return None

In [None]:
parameters = pd.read_csv(automation_dir + 'param.csv')

In [None]:
# Energy Calibration
def kev_to_channel(event_energy, energy_resolution=4.2):
    energy_uncertainty = energy_resolution * math.sqrt(event_energy) / 1000
    event_energy = (event_energy / 1000) # convert to MeV
    event_energy = np.random.normal(event_energy, energy_uncertainty)
    
    calib_point_1 = (0.806, 156745) ; calib_point_2 = (1.679, 320842)
    energy_1, channel_1 = calib_point_1 ;  energy_2, channel_2 = calib_point_2
    slope = (channel_2 - channel_1) / (energy_2 - energy_1)
    intercept = channel_1 - slope * energy_1
    target_channel = (event_energy * slope) + intercept
    return target_channel

In [None]:
def convert_sim_to_raw_h5(h5_file_name, event_energy, energy_resolution):
    print('Converting simulation h5 files to raw h5 format...')
    
    with open(f'{automation_dir}../../.input/padxy.txt') as padxy_lookup_txt: # TODO: non-hardcoded path
        padxy_lookup = padxy_lookup_txt.readlines()
    padxy_lookup = np.array([tuple(map(float, line.strip().split(','))) for line in padxy_lookup])
    with open(f'{automation_dir}../../.input/flatlookup4cobos.csv') as flatlookup4cobos_csv:
        flatlookup4cobos = flatlookup4cobos_csv.readlines()
    flat_padplane_lookup = {}
    for i, line in enumerate(flatlookup4cobos):
        flat_padplane_lookup[i] = tuple(map(int, line.strip().split(',')))

    old_format_h5 = h5py.File(f"{h5_file_name}", 'r')
    new_format_h5 = h5py.File(f"{h5_file_name.replace('.h5', '_new.h5')}", 'w')
    new_format_h5.create_group('get')
    new_format_h5.create_group('meta')
    new_format_h5.create_group('clouds')
    
    #meta/meta
    new_format_h5.create_dataset("meta/meta", data=[np.inf, np.inf, -1, -1], dtype='float64') # N0, T0, N1, T1
    event_keys = [key for key in old_format_h5.keys() if "Event_[" in str(key)]
    
    for event in event_keys:
        event_number = int(event.split('[')[1].split(']')[0])
        
        # get/evt#_header
        sim_creation_time = 0 #float(h5_file_name.replace('.h5', '').split('/')[-1])
        event_data = [event_number, sim_creation_time, event_number, 0]
        new_format_h5.create_dataset(f"get/evt{event_number}_header", data=event_data, dtype='float64')
        
        # unique pad coordinates
        Old_HitArray = old_format_h5[event]['HitArray']
        pads_xy = np.column_stack((Old_HitArray['x'], Old_HitArray['y']))
        pads_xy = np.unique(pads_xy, axis=0)
        
        # get/evt#data and clouds/evt#_cloud
        raw_event_data = np.zeros((len(pads_xy), 517), dtype='int16')
        pca_event_data = np.zeros((len(pads_xy), 5), dtype='float64')
        for pad_index, pad in enumerate(pads_xy):        
            
            # locate the nearest pad based on the lookup table
            pad_number = ((padxy_lookup - pad)**2).sum(axis=1).argmin()
            raw_event_data[pad_index, :5] = flat_padplane_lookup[pad_number] # Cobo, ASAD, AGET, channel, pad
            
            pca_event_data[pad_index, 0:2] = pad # x, y
            pca_event_data[pad_index, 4] = pad_number
            max_ze = (0,0)
            
            pad_hits = Old_HitArray[(Old_HitArray['x'] == pad[0]) & (Old_HitArray['y'] == pad[1])]
            for hit in pad_hits:
                raw_event_data[pad_index, hit['t']+5] += hit['A'] #TODO check if T or Z is the correct value to use
                pca_event_data[pad_index, 3] += hit['A']
                
                if hit['A'] > max_ze[1]:
                    pca_event_data[pad_index, 2] = hit['z']
                    max_ze = (hit['z'], hit['A'])
        
        # Energy Calibration - disabled for now
        '''
        simulated_channel = np.sum(pca_event_data[:, 3])
        target_channel = kev_to_channel(event_energy, energy_resolution)
        while target_channel < 0: # ensure that the target channel is positive
            target_channel = kev_to_channel(event_energy, energy_resolution)
            #TODO: potential infinite loop if the energy is too low 
        raw_event_data[:, 5:] = raw_event_data[:, 5:] * (target_channel / simulated_channel)
        pca_event_data[:, 3] = pca_event_data[:, 3] * (target_channel / simulated_channel)
        '''
        
        # save the data
        new_format_h5.create_dataset(f"get/evt{event_number}_data", data=raw_event_data, dtype='int16')
        new_format_h5.create_dataset(f"clouds/evt{event_number}_cloud", data=pca_event_data, dtype='float64')
        
        # update meta/meta
        new_format_h5['meta/meta'][0] = min(new_format_h5['meta/meta'][0], event_number)
        new_format_h5['meta/meta'][1] = min(new_format_h5['meta/meta'][1], sim_creation_time)
        new_format_h5['meta/meta'][2] = max(new_format_h5['meta/meta'][2], event_number)
        new_format_h5['meta/meta'][3] = max(new_format_h5['meta/meta'][3], sim_creation_time)
        
        #TODO: COBO/ASAD Metadata files
        #TODO: Simulation parameters stored directly inside h5 file
        
    # close the files
    old_format_h5.close()
    new_format_h5.close()
    
    os.remove(f"{h5_file_name}")
    os.rename(f"{h5_file_name.replace('.h5', '_new.h5')}", f"{h5_file_name}")

In [None]:
# check for and complete any active simulations

# 0 = inactive
# 1 = active
# 2 = complete
indicator_file('PROCESSING H5')

active_sims = parameters[parameters['Status'] == 1]

if len(active_sims) > 1:
    print('More than one simulation marked as active')
    indicator_file('STOP')

# Search for output.h5 and rename
Complete = False
try:
    #h5_name = f"{automation_dir}out/{active_sims.loc[active_sims.index[0],'Sim']}.h5"
    h5_name = f"{automation_dir}out/output.h5"
    #os.rename(f"{automation_dir}out/output.h5", h5_name)
    
    event_energy = active_sims.loc[active_sims.index[0], 'E0'] + active_sims.loc[active_sims.index[0], 'E1']
    
    energy_resolution = active_sims.loc[active_sims.index[0], 'EnergyResolution'] if 'EnergyResolution' in active_sims.columns else 4.2
    
    convert_sim_to_raw_h5(h5_name, event_energy, energy_resolution)
    
    # copy h5 file to the correct directory
    os.system(f"cp {h5_name} {automation_dir}out/hdf5/{active_sims.loc[active_sims.index[0],'Sim']}.h5")
    Complete = True
    
except FileNotFoundError:
    pass

# Set Status in parameters
if Complete:
    parameters.loc[active_sims.index[0], 'Status'] = 2
    if 'Time' in parameters.columns:
        parameters.loc[active_sims.index[0], 'Time'] = time.time() - parameters.loc[active_sims.index[0], 'Time']
else:
    print('No output.h5 found')
    indicator_file('STOP')

# Update parameters.csv to reflect complete h5 file
parameters.to_csv(automation_dir + 'param.csv', index=False)