## This notebook shows all the means to create and to configure DiagramDataset
* A `DiagramDataset` allows you to exploit information on the spatial position of different shots.
* A `DiagramDataset` allows you to manage data persistence in real time as well.

In [None]:
import os
import pathlib
import shutil
import tempfile
import time

from laueimproc.io.download import get_samples  # gives access to the dataset
from laueimproc import Diagram, DiagramDataset

### How to add diagrams to the dataset ?
* All the means to init a `Diagram` are described in the `api_init_diagram` notebook.
* You can append diagrams before or after having define a function chain.

In [None]:
# from a pathlike
folder = get_samples()
dataset_1 = DiagramDataset(folder)  # pathlib.Path directory
dataset_2 = DiagramDataset(min(folder.glob("*.jp2")))  # pathlib.Path file

# from `Diagram` instances
diagrams = [Diagram(f) for f in sorted(folder.glob("*.jp2"))]
dataset_3 = DiagramDataset(diagrams)  # from an iterable of diagrams
dataset_4 = DiagramDataset(diagrams[0], diagrams[-1])  # from a few well-chosen diagrams

# big mix
dataset_5 = DiagramDataset(diagrams[0], diagrams[1], [diagrams[2], diagrams[3]], diagrams[4].file)

In [None]:
# from an uncomplet directory for real time
unfilled_folder = pathlib.Path(tempfile.mkdtemp())
dataset = DiagramDataset(unfilled_folder)
for diagram in diagrams[:20]:  # copy a slice of the dataset
    shutil.copy(diagram.file, unfilled_folder)
    time.sleep(1)
    print(f"the dataset contains {len(dataset)} diagrams")
time.sleep(10)
print(f"the dataset contains {len(dataset)} diagrams")

print(dataset[0])

### How to apply a function to the diagrams of the dataset ?

In [None]:
def peaks_search(diagram: Diagram, density: float) -> int:
    """Apply peak search and return the number of peacks founded."""
    diagram.find_spots(density=density)
    return len(diagram)

def sorted_pxl_max(diagram: Diagram):
    """Sorted the diagram by decreasing intensities."""
    indexs = diagram.rois.amax(dim=(1, 2)).argsort(descending=True)
    diagram.filter_spots(indexs, msg="sorted by decreasing intensities", inplace=True)

In [None]:
# apply to existing diagrams
nbr_spots = dataset.apply(peaks_search, args=(0.6,))
print(nbr_spots)
_ = dataset.apply(sorted_pxl_max)
print(dataset[0])

In [None]:
# apply in real time to scanned diagrams as they are scanned
for diagram in diagrams[20:]:
    shutil.copy(diagram.file, unfilled_folder)
while True:
    try:
        print(dataset[99])
    except IndexError:
        time.sleep(1)
    else:
        break

In [None]:
# apply only on a subset
indexs = dataset[::5].apply(sorted_pxl_max)
print(sorted(indexs))