In [1]:
import os
import numpy as np
import pandas as pd
import time
import numba
import glob

In [2]:
#This function is like the x%64 function but when x is equal to 64 che function will return 64 istead 0
@numba.vectorize([numba.u1(numba.u1)])
def minus64(x):
    if x <=64:
        return x
    else:
        return x - 64

#This function for each layer number X will return the correspond value Y such that (TDC_CANNEL - Y)%4 = 0
@numba.vectorize([numba.u1(numba.u1)])
def layer_norm(x):
    if x == 1:
        return 1
    elif x == 2:
        return 3
    elif x == 3:
        return 2
    elif x == 4:
        return 4
    else:
        return 0
    
#This function, given e list 'lista' and a integer number n, will return a list of index such that if we apply the 
#function np.split(lista, index) we will have a list of array where each array contains the values with maximum distant
#n
@numba.jit([numba.uint64[:](numba.int64[:], numba.uint16),numba.uint64[:](numba.uint32[:], numba.uint16),numba.uint64[:](numba.uint8[:], numba.uint16)])
def subset(lista, n):
    index = [0] #the first index
    for i in range(len(lista) - 1):
        if (lista[i+1] - lista[i]) > n:
            index.append(i+1)
        else:
            continue
    index.append(len(lista)) #the last index
    return np.array(index).astype(np.uint64)

#This function will group hits by closest time using the subset function. For each group the function will compute 
# the drift time and marks the event with a event number
@numba.jit(numba.types.Tuple((numba.int16[:],numba.uint32[:]))(numba.uint8[:], numba.int64[:]))
def Time_Max_Filer(detectors, t):
    index = subset(t, 415)
    drift = []
    event = []
    for i in range(1,len(index)):
        t_times = (detectors[index[i-1]:index[i]] == 0) #the true positions are 139 triggers 
        times = t[index[i-1]:index[i]]
        if t_times.sum() == 0: #no triggers 
            drift.extend([0]*len(t_times))
            event.extend([0]*len(t_times))
        else:
            trigger_time = times[t_times].min() #the first trigger activated 
            for ns in times - trigger_time:
                drift.append(ns)
                event.append(i+1) 
    return np.array(drift).astype(np.int16) , np.array(event).astype(np.uint32)   

#function that recognizes if 4 consicutive cells are alligned along the layers
@numba.jit(numba.boolean(numba.uint8, numba.uint8, numba.uint8, numba.uint8))
def close(x1,x2,x3,x4):
    if (x2 == x1) or (x2 == x1 + 1):
        if (x3 == x2) or (x3 == x2 - 1):
            if (x4 == x3) or (x4 == x3 + 1):
                if (x4 == x3) or (x4 == x3 - 1):
                    return True           
    return False

#this function group by events (index1) and detectors (index2) and will return a mask of the alligned over consecutive
#cells events
@numba.jit(numba.boolean[:](numba.uint32[:], numba.uint8[:], numba.uint8[:], numba.uint8[:]))
def allign4(events, detectors, layers, columns):
    index1 = subset(events, 1)
    index2 = subset(detectors, 1)
    index = np.unique(np.concatenate((index1,index2),0))
    l4 = []
    for i in range(1,len(index)):
        l = layers[index[i-1]:index[i]]
        c = columns[index[i-1]:index[i]]
        if l.size == np.unique(l).size == 4:
            if close(c[0],c[1],c[2],c[3]):
                l4.extend([True]*4)
            else:
                l4.extend([False]*4)
            
        else:
            l4.extend([False]*l.size) 
    return np.array(l4)

In [22]:
!ls ../../../data/Run000260

data_000000.txt  data_000002.txt  data_000004.txt  data_000006.txt
data_000001.txt  data_000003.txt  data_000005.txt  data_000007.txt


In [3]:
DataFrames = []
nfile = 0

In [7]:
for filename in sorted(glob.glob('../../../data/Run000260/*'))[0:1]:
    nfile += 1
    print('started', nfile, 'run')
    data = pd.read_csv(filename, engine = 'c', sep = ',').dropna().drop('HEAD', axis = 1)
    #Setting the data type will reduce the exectution time of the numba functions and will save memory
    data = data.astype({"FPGA": np.bool, "TDC_CHANNEL":np.uint8,"ORBIT_CNT":np.int64,"BX_COUNTER":np.uint16,"TDC_MEAS":np.uint8})
    
    #We need the orbit information since we are not gropung by orbit, however we can renormaliza the value of the orbit
    #for having not too large time data
    orbit_min = data.ORBIT_CNT.min()
    data.eval('ORBIT_CNT =  ORBIT_CNT - @orbit_min', inplace = True)
    
    #Adding time column
    data.eval('t =  ORBIT_CNT*3564 + BX_COUNTER*25 + TDC_MEAS*25/30', inplace = True)
    data = data.astype({"t": np.int64})    
    
    #Sorting the time is necessary for appling the functions of above cell
    data = data.sort_values(by = ['t'])
    
    #List of conditions to assign a hit to a detector
    fpga = data['FPGA'].values == True 
    channel = (data['TDC_CHANNEL'].values > 64) & (data['TDC_CHANNEL'].values <= 128) #condition on TDC_CHANNEL
    trigger = data['TDC_CHANNEL'].values >= 129 

    conditions = [trigger, ~fpga & ~channel, ~fpga & channel, fpga & ~channel, fpga & channel]
    values = [0, 1, 2, 3, 4]

    #Adding detector column
    data['DETECTOR'] = np.select(conditions, values).astype(np.uint8)


    #List of conditions to assign a hit to a layer
    layer_4 = data['TDC_CHANNEL'].values%4 == 0 
    layer_3 = (data['TDC_CHANNEL'].values + 2) % 4 == 0
    layer_2 = (data['TDC_CHANNEL'].values + 1 ) % 4 == 0
    layer_1 = (data['TDC_CHANNEL'].values + 3) % 4 == 0
    trigger = data['TDC_CHANNEL'].values >= 129 

    #Adding layer column
    conditions = [trigger, layer_4, layer_3, layer_2, layer_1]
    values = [0, 4, 3, 2, 1]

    data['LAYER'] = np.select(conditions, values).astype(np.uint8)

    #add column coordinate of the cell
    data['COLUMN'] = ((minus64(data['TDC_CHANNEL'].values)
                   - layer_norm(data['LAYER'].values)) / 4 + 1).astype(np.uint8)

    #set column value to 0 when trigger is activate
    data.loc[data['DETECTOR'] == 0, 'COLUMN'] = 0
    
    #Dropping other trigger information and useless columns
    data.query('TDC_CHANNEL != 129 & TDC_CHANNEL != 130 & TDC_CHANNEL != 137 & TDC_CHANNEL != 138', inplace = True)
    data = data.drop(['ORBIT_CNT','FPGA', 'TDC_CHANNEL', 'BX_COUNTER', 'TDC_MEAS'], axis = 1)
    
    #We compute the drift time using the function Time_Max_Filer
    data['DRIFT_TIME'], data['EVENT'] = Time_Max_Filer(data.DETECTOR.values, data.t.values) 
    data.query('EVENT != 0 & DETECTOR != 0 & -415 <= DRIFT_TIME < 415', inplace = True)

    #Computed the drift time we don't need more the absolute time data
    data = data.drop('t', axis = 1)

    #We sort the data now by event, detector and layer (necessary for the allign4 function) 
    #and we apply the allign4 function
    data = data.sort_values(['EVENT', 'DETECTOR','LAYER'])
    data = data[allign4(data.EVENT.values, data.DETECTOR.values, data.LAYER.values, data.COLUMN.values)]
    DataFrames.append(data)

started 9 run


In [6]:
pd.concat(DataFrames)
#data.to_csv('../../../data/group15/Run000333.txt', index = False)

Unnamed: 0,DETECTOR,LAYER,COLUMN,DRIFT_TIME,EVENT
500,2,1,7,-303,322
503,2,2,7,-35,322
502,2,3,7,-124,322
501,2,4,7,-242,322
517,3,1,16,-321,327
...,...,...,...,...,...
704391,1,4,7,-80,266447
704395,3,1,11,-28,266448
704392,3,2,11,-262,266448
704396,3,3,11,-85,266448


In [None]:
#problem to be fixed: not all the alligned pattern are taken