In [10]:
from typing import List, Mapping
from itertools import zip_longest, chain
from glob import iglob
from pickle import load
from functools import partial, reduce

from numba import jit
from scipy.ndimage.filters import gaussian_filter
import pandas as pd
import numpy as np
from numpy.random import normal, choice, rand
from numpy.polynomial.legendre import Legendre
import matplotlib.pyplot as plt
from h5py import File

from vmitools import (
    abel_inverse, finite_legendre_transform_in_theta,
    interp, tohist, transpose_linearly, transpose_to_drdomega, transpose_to_drdth,
    mrot, mhorshear, msqueeze,
)

In [11]:
filenames = [
#     "/data/Step501N2/combined/274-283/274-283_1_66.h5",
    "/data/Step601N2/combined/336/336_10_10.h5",
]

In [12]:
def read_file(filename):
    try:
        with File(filename, "r") as f:
            imgs = f['/vmi/data'][...].astype('float')
            n = f['/signal_shots'][...]
            delays = f['/IR_delays'][...]
            atmax = f['/peak_wavelengths'][...]
            i0, i1, ix, iy = imgs.shape
            indexes = (delays * atmax.T).reshape(-1)
    except KeyError:
        print("Got an error at: {}".format(filename))
        return None
    ret = pd.DataFrame({
        'sum': [img for img in (imgs * n[:, :, None, None]).reshape(-1, ix, iy)],
        'count': n.reshape(-1),
    }, index=pd.MultiIndex.from_product(
        [delays.reshape(-1), atmax.reshape(-1)],
        names=['delay_float', 'atmax_float'],
    ))
    where = ret['count'] != 0
    return ret[where]


sumup = partial(reduce, partial(pd.DataFrame.add, fill_value=0))
summed_flatten = sumup(read_file(fn) for fn in filenames)
summed_flatten.reset_index(inplace=True)

In [13]:
@jit
def bin_delays(d):
    step = 0.02  # Check delay step!
    centers = np.arange(-10, 0, step)
    bins = (centers[1:] + centers[:-1]) / 2
    return centers.item(np.digitize(d, bins=bins))


@jit
def bin_atmax(d):
    step = 0.02  # Check delay step!
    centers = np.arange(75, 85, step)
    bins = (centers[1:] + centers[:-1]) / 2
    return centers.item(np.digitize(d, bins=bins))


summed_flatten['delay'] = summed_flatten['delay_float'].apply(bin_delays)
summed_flatten['atmax'] = summed_flatten['atmax_float'].apply(bin_atmax)
summed_all = summed_flatten.groupby(['delay', 'atmax'])[['sum', 'count']].apply(sum)
summed_all[['count']]

Unnamed: 0_level_0,Unnamed: 1_level_0,count
delay,atmax,Unnamed: 2_level_1
-7.0,80.58,11.0
-7.0,80.6,418.0
-7.0,80.62,505.0
-7.0,80.64,66.0
-6.88,80.58,8.0
-6.88,80.6,306.0
-6.88,80.62,510.0
-6.88,80.64,147.0
-6.78,80.58,1.0
-6.78,80.6,111.0
