In [53]:
import os
import socket
import glob
import h5py
import numpy as np
import pandas as pd
from pandas import Series, concat, cut
from datetime import datetime

import sqlite3
from sqlite3 import Error

import dask.dataframe as dd
from dask.delayed import delayed
from dask.distributed import Client

from scipy.stats import pearsonr, spearmanr
import matplotlib.pyplot as plt
import yaml

In [54]:
def timing_series(h5file: h5py.File, flash:int) -> Series:
    assert flash in [1, 2], "Precond.: FLASH 1 or 2"
    tGroup = h5file[f"/FL{flash:1d}/Timing/Bunch pattern/train index {flash:1d}"]
    trainId = Series(tGroup["index"], name="Train ID")
    return Series(tGroup["time"], name="Date/Time", index=trainId).apply(datetime.fromtimestamp)

def dset_series(group: h5py.Group, pos=None) -> Series:
    short_name = "/".join(group.name.split("/")[-3:])
    trainId = Series(group["index"], name="Train ID")
    if pos:
        return Series((group["value"][i][pos] for i in trainId.index), name=short_name, index=trainId)
    else:
        return Series((group["value"][i] for i in trainId.index), name=short_name, index=trainId)

def h5_load(filename, dataset_dict):
    with h5py.File(filename, 'r') as h5file:
        time = timing_series(h5file, 1)       
        dataset_list = [dset_series(h5file[value]) if type(value)==str 
                        else dset_series(h5file[value[0]],value[1]) for value in dataset_dict.values()]
        df = concat([time]+ dataset_list, axis=1).dropna()
        df.columns = ['time'] + [key for key in dataset_dict]
        return df
    
def load_dataset(file_list, dataset_dict):
    return dd.from_delayed([delayed(h5_load)(file, dataset_dict) for file in file_list])

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn

def get_daq_files(id_min, id_max):
    # create a database connection
    conn = create_connection(database)
    with conn:
        sql = f'''SELECT name FROM files WHERE id IN 
            (SELECT file_id FROM trainIDs WHERE id >= {id_min} AND id <= {id_max})'''
        cur = conn.cursor()
        cur.execute(sql)
        daq_files = cur.fetchall()
    if len(daq_files) > 0:
        return [os.path.join('/asap3/fs-flash-o/gpfs/camp/2020/data/11010494/raw/hdf/express-0', i[0]) for i in daq_files]
    else:
        return

In [9]:
local_cluster = Client() 
local_cluster

0,1
Client  Scheduler: tcp://127.0.0.1:38626  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 10  Cores: 80  Memory: 540.77 GB


In [17]:
# get run-numbers
#with open('runs.yaml', 'r') as f:
#    runNrs = yaml.safe_load(f)
#timepix_run_number = 865
tpxFile = '/asap3/fs-flash-o/gpfs/camp/2020/data/11010494/processed/hdf5/e-run_0007_20200903-1517.hdf5'
database = '/asap3/fs-flash-o/gpfs/camp/2020/data/11010494/processed/hdf5/trainIDs.db'

ghz_adc_addr = '/FL1/Experiment/BL1/ADQ412 GHz ADC/CH00/TD'

#### TimePix

##### get trainIDs

In [18]:
def get_trainIDs(tpxFile):
    with h5py.File(tpxFile, 'r') as f:
        x2_trainIDs = f['timing/facility/train id'][:]
        x2_timestamps = f['timing/facility/timestamp'][:]
        tpx3_triggerNrs = f['timing/timepix/trigger nr'][:]
        tpx3_timestamps = f['timing/timepix/timestamp'][:]
    assert len(x2_trainIDs) == len(x2_timestamps), 'unmatching length'
    assert len(tpx3_triggerNrs) == len(tpx3_timestamps), 'unmatching length'
    assert len(set(x2_trainIDs)) == len(x2_trainIDs), 'found duplicates'
    assert len(set(x2_timestamps)) == len(x2_timestamps), 'found duplicates'
    assert len(set(tpx3_triggerNrs)) == len(tpx3_triggerNrs), 'found duplicates'
    assert len(set(tpx3_timestamps)) == len(tpx3_timestamps), 'found duplicates'

    start_index = np.abs(x2_timestamps - tpx3_timestamps[0]).argmin()
    print(start_index)

    #assert not (missing_elements(x2_trainIDs[start_index:])), 'list of trainIDs is not continuous'
    trainIDs = [x2_trainIDs[start_index]]
    trigger_Nrs = [tpx3_triggerNrs[0]]
    skip = 1
    for i in range(len(tpx3_triggerNrs) - 1):
        if (tpx3_triggerNrs[i + 1] - tpx3_triggerNrs[i]) == 2:
            skip += 1
        try:
            trainIDs.append(x2_trainIDs[start_index + i + skip])
            trigger_Nrs.append(tpx3_triggerNrs[i+1])
        except IndexError:
            pass
    assert len(trainIDs) == len(trigger_Nrs), 'matching fails'
    
    return (np.array(trigger_Nrs), np.array(trainIDs))

In [19]:
triggers, trainIDs_tpx = get_trainIDs(tpxFile)
print(len(triggers), len(trainIDs_tpx))

0
3302 3302


In [20]:
with h5py.File(tpxFile, 'r') as f:
    #tof = f['raw/tof'][:]
    trigNr = f['raw/trigger nr'][:]

In [21]:
_ , events = np.unique(trigNr, return_counts=True)

### FLASH's ADC

In [22]:
def sum_tof(trace, offset=10):
    if np.sum(trace) == 0:
        return -1
    trace -= np.mean(trace)
    return np.sum(trace[trace > offset])

In [71]:
%%time 
id_min = int(trainIDs_tpx[0])
id_max = int(trainIDs_tpx[-1])
daq_files = get_daq_files(id_min, id_min+10)
#adc_traces = np.float64(daq.valuesOfInterval(ghz_adc_addr, (tt1, tt2)))
dataset_dict = {'ToF': '/FL1/Experiment/BL1/ADQ412 GHz ADC/CH00/TD'}

df = load_dataset(daq_files, dataset_dict)

df

CPU times: user 7.47 s, sys: 371 ms, total: 7.84 s
Wall time: 7.77 s


Unnamed: 0_level_0,time,ToF
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1
,datetime64[ns],object
,...,...


In [72]:
dir(df)

['ToF',
 '_HTML_FMT',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_ufunc__',
 '__array_wrap__',
 '__await__',
 '__bool__',
 '__class__',
 '__complex__',
 '__contains__',
 '__dask_graph__',
 '__dask_keys__',
 '__dask_layers__',
 '__dask_optimize__',
 '__dask_postcompute__',
 '__dask_postpersist__',
 '__dask_scheduler__',
 '__dask_tokenize__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__iter__',
 '__le__',
 '__len__',
 '__long__',
 '__lt__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rfloordiv__',
 '__rmod__',
 '__rmul__',
 '__ror__',
 '__rpow__',
 '__rsub__',
 '__rtruediv__',
 '__

In [70]:
df.set_index(inplace=True, sorted=True)

TypeError: set_index() missing 1 required positional argument: 'other'

In [None]:
tof_sum = []
trainIDs_daq = []
for i in range(len(adc_traces)):
    trainIDs_daq.append(id_min + i)
    tof_sum.append(sum_tof(adc_traces[i]))
tof_sum = np.asarray(tof_sum)

In [None]:
adc_traces.shape

In [None]:
val, comm1, comm2 = np.intersect1d(trainIDs_daq, trainIDs_tpx-9, assume_unique=True, return_indices=True)
tof_corr = tof_sum[comm1]
tpx_corr = events[comm2]

corr2, _ = spearmanr(tof_corr, tpx_corr)
corr2

In [None]:
plt.plot(tof_corr, tpx_corr, 'o')

In [None]:
%%time

spear = []
pear = []
shift = []

for s in range(-25,5):
    val, comm1, comm2 = np.intersect1d(trainIDs_daq, trainIDs_tpx+s, assume_unique=True, return_indices=True)
    tof_corr = tof_sum[comm1]
    tpx_corr = events[comm2]
        
    tof_corr = np.array(tof_corr)
    tpx_corr = np.array(tpx_corr)
    corr, _  =  pearsonr(tof_corr, tpx_corr)
    corr2, _ = spearmanr(tof_corr, tpx_corr)
    pear.append(corr)
    spear.append(corr2)
    shift.append(s)
    print(s)

In [None]:
fig = plt.figure()
plt.plot(shift, pear,'bo')
plt.plot(shift, spear,'ro')
plt.title(f' Pearson (blue) vs Spearman (red) ')
plt.xlabel('shift')
plt.ylabel('corr coef')

pass

In [None]:
%%time

spear = []
pear = []
shift = []

for s in range(-25,5):

    tof_corr = []
    tpx_corr = []
 
    for i in range(len(events)): # triggerNumbers TPX [0, 1, ..., 4332]
        for j in range(len(trainIDs_daq)): # 619006674...
            if (trainIDs_tpx[i]+s) == trainIDs_daq[j]:
                tof_corr.append(tof_sum[j])
                tpx_corr.append(events[i])

    tof_corr = np.array(tof_corr)
    tpx_corr = np.array(tpx_corr)
    corr, _  =  pearsonr(tof_corr, tpx_corr)
    corr2, _ = spearmanr(tof_corr, tpx_corr)
    pear.append(corr)
    spear.append(corr2)
    shift.append(s)
    print(s)

In [None]:
fig = plt.figure()
plt.plot(shift, pear,'bo')
plt.plot(shift, spear,'ro')
plt.title(f' Pearson (blue) vs Spearman (red) ')
plt.xlabel('shift')
plt.ylabel('corr coef')

pass

### check generated hdf5 file for correlation

In [None]:
file = 'out/run_0865_20191219-0926.hdf5'

In [None]:
with h5py.File(file, 'r') as f:
    #f['/timing/facility'].attrs['shift'] = -9
    print(f['/timing/facility'].attrs['shift'])