## This notebook shows all the means to create and to configure DiagramDataset
* A `DiagramDataset` allows you to exploit information on the spatial position of different shots.
* A `DiagramDataset` allows you to manage data persistence in real time as well.

In [1]:
import pathlib
import shutil
import tempfile
import time

from laueimproc.io.download import get_samples  # gives access to the dataset
from laueimproc import Diagram, DiagramDataset

### How to add diagrams to the dataset ?
* All the means to init a `Diagram` are described in the `api_init_diagram` notebook.
* You can append diagrams before or after having define a function chain.

In [2]:
# from a pathlike
folder = get_samples()
dataset_1 = DiagramDataset(folder)  # pathlib.Path directory
dataset_2 = DiagramDataset(min(folder.glob("*.jp2")))  # pathlib.Path file

# from `Diagram` instances
diagrams = [Diagram(f) for f in sorted(folder.glob("*.jp2"))]
dataset_3 = DiagramDataset(diagrams)  # from an iterable of diagrams
dataset_4 = DiagramDataset(diagrams[0], diagrams[-1])  # from a few well-chosen diagrams

# big mix
dataset_5 = DiagramDataset(diagrams[0], diagrams[1], [diagrams[2], diagrams[3]], diagrams[4].file)

In [3]:
# from an uncomplet directory for real time
unfilled_folder = pathlib.Path(tempfile.mkdtemp())
dataset = DiagramDataset(unfilled_folder)
for diagram in diagrams[:10]:  # copy half dataset
    shutil.copy(diagram.file, unfilled_folder)
    time.sleep(2)
    print(f"the dataset contains {len(dataset)} diagrams")
time.sleep(10)
print(f"the dataset contains {len(dataset)} diagrams")

print(dataset[0])

the dataset contains 1 diagrams
the dataset contains 1 diagrams
the dataset contains 1 diagrams
the dataset contains 1 diagrams
the dataset contains 5 diagrams
the dataset contains 5 diagrams
the dataset contains 5 diagrams
the dataset contains 5 diagrams
the dataset contains 5 diagrams
the dataset contains 10 diagrams
the dataset contains 10 diagrams
Diagram from img_00.jp2:
    History empty, please initialize the spots `self.find_spots()`.
    Current state:
        * id, state: 137905629015600, 3dd438383ae07061c7b4b326616f6b29
        * total mem: 400.0B


### How to apply a function ?

In [4]:
def peaks_search(diagram: Diagram, density: float) -> int:
    """Apply peak search and return the number of peacks founded."""
    diagram.find_spots(density=density)
    return len(diagram)

def sorted_pxl_max(diagram: Diagram):
    """Sorted the diagram by decreasing intensities."""
    indexs = diagram.rois.amax(dim=(1, 2)).argsort(descending=True)
    diagram.filter_spots(indexs, msg="sorted by decreasing intensities", inplace=True)

In [5]:
# apply to existing diagrams
nbr_spots = dataset.apply(peaks_search, args=(0.6,))
print(nbr_spots)
_ = dataset.apply(sorted_pxl_max)
print(dataset[0])

100%|███████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 31230.86diag/s]


{0: 1217, 4: 505, 3: 2330, 2: 3387, 1: 534, 9: 239, 8: 334, 7: 977, 6: 529, 5: 297}


100%|███████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 22322.00diag/s]

Diagram from img_00.jp2:
    History:
        1. 1217 spots from self.find_spots(density=0.6)
        2. 1217 to 1217 spots: sorted by decreasing intensities
    Current state:
        * id, state: 137905629015600, bc847205c859e6124b1a34fa2741448f
        * nbr spots: 1217
        * total mem: 16.5MB





In [6]:
# apply in real time to scanned diagrams as they are scanned
for diagram in diagrams[10:]:
    shutil.copy(diagram.file, unfilled_folder)
while True:
    try:
        print(dataset[99])
    except IndexError:
        time.sleep(1)
    else:
        break

Diagram from img_99.jp2:
    History:
        1. 1199 spots from self.find_spots(density=0.6)
        2. 1199 to 1199 spots: sorted by decreasing intensities
    Current state:
        * id, state: 137905629098432, a745bd187ee0c48269aad677c17e427f
        * nbr spots: 1199
        * total mem: 16.7MB


In [11]:
# apply only on a subset
indexs = dataset[::5].apply(sorted_pxl_max)
print(sorted(indexs))

100%|███████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 49200.05diag/s]

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95]



