In [28]:
from glob import iglob
from datetime import datetime
from itertools import chain, repeat
from functools import lru_cache

import numpy as np
import pandas as pd
from cytoolz import compose, reduce
from h5py import File
from dask import delayed, compute
from dask.bag import from_sequence
from dask.diagnostics import ProgressBar
import matplotlib.pyplot as plt

In [22]:
def read_delay(filename):
    with File(filename, 'r') as f:
        key = '/user_laser/delay_line/position'
        dt = f[key][0]
        if all(dt == f[key][...]):
            return dt
        return np.nan


def read_scala(filename, tag_offset = None):
    if tag_offset is None:
        where = slice(None)
    else:
        where = slice(tag_offset, None)
    with File(filename, 'r') as f:
        try:
            for tag, iom, img, dt in zip(
                f['/bunches'][where],
                f['/photon_diagnostics/FEL01/I0_monitor/iom_sh_a_pc'][where],
                f['/vmi/andor'],
                f['/user_laser/delay_line/position'][where],
            ):
                yield {
                    'tag': tag,
                    'iom_intensity_pc': iom,
                    'img_intensity': img.sum(),
                    'delay_float': dt,
                }
        except KeyError:
            return


def read_img(filename, tag_offset = None):
    if tag_offset is None:
        where = slice(None)
    else:
        where = slice(tag_offset, None)
    with File(filename, 'r') as f:
        try:
            for tag, iom, img in zip(
                f['/bunches'][where],
                f['/photon_diagnostics/FEL01/I0_monitor/iom_sh_a_pc'][where],
                f['/vmi/andor'],
            ):
                yield {
                    'tag': tag,
                    'iom_intensity_pc': iom,
                    'img': img.astype('double'),
                }
        except KeyError:
            return

In [6]:
run = 212
tag_offset = 0  # 0 or 1
filenames = sorted(fn for fn in set(iglob(f"/data/*/Run_{run:03d}/rawdata/*.h5")))
filelist = pd.DataFrame(
    {'filename': fn, 'delay_float': read_delay(fn)}
    for fn in filenames
)
filelist

Unnamed: 0,delay_float,filename
0,0.000000,/data/Step501N2/Run_212/rawdata/Run_212_0.h5
1,,/data/Step501N2/Run_212/rawdata/Run_212_486595...
2,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...
3,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...
4,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...
5,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...
6,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...
7,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...
8,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...
9,-6.999999,/data/Step501N2/Run_212/rawdata/Run_212_486596...


In [16]:
def binit(d):
    centers = np.arange(-10, 0, 0.03)  # Check delay step!
    bins = (centers[1:] + centers[:-1]) / 2
    return centers.item(np.digitize(d, bins=bins))


where = (filelist["delay"] != np.nan) & (filelist["delay"] < -1)
filelist["delay"] = filelist["delay_float"].apply(binit)
filelist = filelist[where]
filelist["delay"].unique()

array([-7.  , -6.97, -6.94, -6.91, -6.88, -6.85, -6.82, -6.79, -6.76,
       -6.73, -6.7 , -6.67, -6.64, -6.61, -6.58, -6.55, -6.52, -6.49,
       -6.46, -6.43, -6.4 , -6.37, -6.34, -6.31, -6.28, -6.25, -6.22,
       -6.19, -6.16])

In [60]:
bg_period, bg_mod = 3, 0  # /Background_Period


@lru_cache()
def sumupit(*filenames):
    ddf = (
        from_sequence(filenames)
        .map(read_img, tag_offset=tag_offset)
        .flatten()
        .to_dataframe()
    )
    ddf["is_bg"] = ddf["tag"] % bg_period == bg_mod
    where = (
        10 < ddf['iom_intensity_pc']  # Change me!
    )

    with ProgressBar():
        ret = compute([
            {
                'is_bg': bg, 'delay': dt,
                'count': delayed(ddf[where & (ddf['is_bg'] == bg)]['img'].to_bag().count()),
                'summed': delayed(ddf[where & (ddf['is_bg'] == bg)]['img'].to_bag().sum()),
            }
            for bg in [True, False]
        ])
    return list(chain(*ret))

In [62]:
grouppedlist = (
    filelist[filelist["delay"] == dt]["filename"].values
    for dt in filelist["delay"].unique()
)

summed = list(chain.from_iterable(sumupit(*fns) for fns in grouppedlist))

[########################################] | 100% Completed |  2min 14.2s
[########################################] | 100% Completed |  1min  4.1s
[########################################] | 100% Completed |  1min  4.3s
[########################################] | 100% Completed |  1min  6.1s
[########################################] | 100% Completed |  1min  3.5s
[########################################] | 100% Completed |  2min  6.9s
[########################################] | 100% Completed |  1min  2.2s
[########################################] | 100% Completed |  1min  1.8s
[########################################] | 100% Completed |  1min  6.3s
[########################################] | 100% Completed |  1min  6.7s
[########################################] | 100% Completed |  2min  7.5s
[########################################] | 100% Completed |  1min  3.4s
[########################################] | 100% Completed |  1min  1.8s
[#####################################

In [69]:
summed = pd.DataFrame(summed).summed.set_index(['delay', 'is_bg'])

Unnamed: 0_level_0,Unnamed: 1_level_0,count,summed
delay,is_bg,Unnamed: 2_level_1,Unnamed: 3_level_1
-6.16,True,1632,"[[177155.0, 176427.0, 176000.0, 178184.0, 1789..."
-6.16,False,3264,"[[362529.0, 362972.0, 359502.0, 364798.0, 3657..."
-6.16,True,1621,"[[175864.0, 175052.0, 174417.0, 177056.0, 1770..."
-6.16,False,3234,"[[358606.0, 358785.0, 355417.0, 360811.0, 3620..."
-6.16,True,1655,"[[179204.0, 178802.0, 177821.0, 180357.0, 1811..."
-6.16,False,3303,"[[366002.0, 366983.0, 362911.0, 368231.0, 3699..."
-6.16,True,3265,"[[354150.0, 353178.0, 351098.0, 355858.0, 3576..."
-6.16,False,6517,"[[724755.0, 724802.0, 718673.0, 728687.0, 7318..."
-6.16,True,1563,"[[169367.0, 168959.0, 168249.0, 170473.0, 1713..."
-6.16,False,3135,"[[349125.0, 350143.0, 345753.0, 350934.0, 3520..."


In [71]:
avg = {
    k: {'delay': k,
        'bg_n': summed.loc[(k, True), 'count'],
        'bg_img': summed.loc[(k, True), 'summed'] / summed.loc[(k, True), 'count'],
        'sg_n': summed.loc[(k, False), 'count'],
        'sg_img': summed.loc[(k, False), 'summed'] / summed.loc[(k, False), 'count'],
        'df_n': summed.loc[(k, False), 'count'],
        'df_img': (summed.loc[(k, False), 'summed'] / summed.loc[(k, False), 'count']
                   - summed.loc[(k, True), 'summed'] / summed.loc[(k, True), 'count'])}
    for k in filelist["delay"].unique()
} 

for k, d in avg.items():
    plt.figure(figsize=(15, 5))
    plt.subplot(131)
    plt.title("Background")
    plt.pcolormesh(d['bg_img'], cmap="Greys")
    plt.clim(0, None)
    plt.axis('equal')
    plt.subplot(132)
    plt.title("Signal")
    plt.pcolormesh(d['sg_img'], cmap="Greys")
    plt.clim(0, None)
    plt.axis('equal')
    plt.subplot(133)
    plt.title("Diff")
    plt.pcolormesh(d['df_img'], cmap="Greys")
    plt.clim(0, None)
    plt.axis('equal')
    plt.tight_layout()
    plt.show()
    break

plt.figure()
plt.plot([k for k in avg], [d['df_img'].sum() for d in avg.values()], 'o-')
plt.grid(True)
plt.show()

KeyError: (-7.000000000000064, True)

In [21]:
# print(filenames)
writein = f"/data/Step501N2/Run_{run:03d}/work"

for k, d in avg.items():
    with File(f"{writein}/reduced_dt={k:.3f}.h5", "w") as f:
        f['delay'] = d['delay']
        f['bg_n'] = d['bg_n']
        f['bg_img'] = d['bg_img']
        f['sg_n'] = d['sg_n']
        f['sg_img'] = d['sg_img']
        f['df_n'] = d['df_n']
        f['df_img'] = d['df_img']