# Binning example time-resolved ARPES data stored on Zenode
In this example, we pull some time-resolved ARPES data from Zenodo, and generate a dask dataframe using the methods of the mpes package. It requires the mpes package to be installed, in addition to the sed package.
For performance reasons, best store the data on a locally attached storage (no network drive).

In [None]:
import sys

import numpy as np

import matplotlib.pyplot as plt
from mpes import fprocessing as fp
import os
import shutil

sys.path.append("../")
from sed.binning import bin_dataframe

# Load Data

In [None]:
dataPath = '/path/to/folder' # Put in Path to a storage of at least 20 Gbyte free space.
! curl --output {dataPath}/WSe2.zip https://zenodo.org/record/6369728/files/WSe2.zip
shutil.unpack_archive(dataPath + '/WSe2.zip', extract_dir=dataPath)

In [None]:
# The Scan directory
fdir = dataPath + '/Scan049_1'
dfp = fp.dataframeProcessor(datafolder=fdir)
dfp.read(source='folder', ftype='h5')
ddf=dfp.edf

# Define the binning range

In [None]:
binAxes = ["X", "Y", "t"]
nBins = [120, 120, 120]
binRanges = [(0, 1500), (0, 1500), (65000, 67000)]
coords = {ax: np.linspace(r[0], r[1], n) for ax, r, n in zip(binAxes, binRanges, nBins)}

In [None]:
ddf.head(10)

# compute distributed binning on the partitioned dask dataframe
We generated 100 dataframe partiions from the 100 files in the dataset, which we will bin parallelly with the dataframe binning function into a 3D grid

In [None]:
%%time
res = bin_dataframe(
    df=ddf,
    bins=nBins,
    axes=binAxes,
    ranges=binRanges,
)

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(8, 2.5), constrained_layout=True)
for dim, ax in zip(binAxes, axs):
    res.sum(dim).plot(ax=ax)

# Compare to MPES binning

In [None]:
%%time
dfp.distributedBinning(axes=binAxes, nbins=nBins, ranges=binRanges, scheduler='threads', ret=False, jittered=False)

In [None]:
(res.data==dfp.histdict['binned']).all()