# S3DXRD processing first step: dataset creation, segmentation and peak labelling

Written by James Ball, Haixing Fang and Jon Wright

Last updated: 23/07/2024

Outside ESRF: download [install_ImageD11_from_git.py](https://github.com/FABLE-3DXRD/ImageD11/tree/master/ImageD11/nbGui/install_ImageD11_from_git.py), and update the path in the next cell:

In [None]:
exec(open('/data/id11/nanoscope/install_ImageD11_from_git.py').read())
PYTHONPATH = setup_ImageD11_from_git( ) # ( os.path.join( os.environ['HOME'],'Code'), 'ImageD11_git' )

In [None]:
# Import needed packages
%matplotlib ipympl
import pprint
import numpy as np
import ImageD11.sinograms.dataset
import ImageD11.sinograms.lima_segmenter
import ImageD11.sinograms.assemble_label
import ImageD11.sinograms.properties
import ImageD11.nbGui.nb_utils as utils
from ImageD11.nbGui import segmenter_gui


# Experts : update these files for your detector if you need to
maskfile = "/data/id11/nanoscope/Eiger/eiger_mask_E-08-0144_20240205.edf"
e2dxfile = "/data/id11/nanoscope/Eiger/e2dx_E-08-0144_20240205.edf"
e2dyfile = "/data/id11/nanoscope/Eiger/e2dy_E-08-0144_20240205.edf"
detector = 'eiger'
omegamotor = 'rot_center'
dtymotor = 'dty'

# Default segmentation options
options = { 'cut' : 1, 'pixels_in_spot' : 3, 'howmany' : 100000 }

In [None]:
# Set up the file paths. Edit this if you are not at ESRF or not using the latest data policy.
dataroot, analysisroot = segmenter_gui.guess_ESRF_paths() 

if len(dataroot)==0:
    print("Please fix in the dataroot and analysisroot folder names above!!")
print('dataroot =',repr(dataroot))
print('analysisroot =',repr(analysisroot))

In [None]:
# List the samples available:
segmenter_gui.printsamples(dataroot)

In [None]:
# USER: Decide which sample
sample = 'WAu'

In [None]:
# List the datasets for that sample:
segmenter_gui.printdatasets( dataroot, sample )

In [None]:
# USER: Decide which dataset
dataset = "siliconAttrz25"

## Example demo to see how a frame is segmented for one dataset:

In [None]:
# create ImageD11 dataset object
ds = ImageD11.sinograms.dataset.DataSet(dataroot=dataroot,
                                        analysisroot=analysisroot,
                                        sample=sample,
                                        dset=dataset,
                                        detector=detector,
                                        omegamotor=omegamotor,
                                        dtymotor=dtymotor
                                       )
ds.import_all()  # Can use scans = [f'{scan}.1' for scan in range(1,102)] )
ds.maskfile = maskfile
ds.e2dxfile = e2dxfile
ds.e2dyfile = e2dyfile
ds.save()

In [None]:
ui = segmenter_gui.SegmenterGui(ds, **options )

In [None]:
options = ui.getopts()

In [None]:
# create batch file to send to SLURM cluster
sbat = ImageD11.sinograms.lima_segmenter.setup(ds.dsfile, **ui.getopts(), pythonpath=PYTHONPATH)
if sbat is None:
    raise ValueError("This scan has already been segmented!")
print(sbat)

In [None]:
utils.slurm_submit_and_wait(sbat, 60)

In [None]:
# label sparse peaks

ImageD11.sinograms.assemble_label.main(ds.dsfile)

In [None]:
# generate peaks table

ImageD11.sinograms.properties.main(ds.dsfile, options={'algorithm': 'lmlabel', 'wtmax': 70000, 'save_overlaps': False})

In [None]:
if 1:
    raise ValueError("Change the 1 above to 0 to allow 'Run all cells' in the notebook")

In [None]:
skips_dict = {
    "FeAu_0p5_tR_nscope": ["top_-50um", "top_-100um"]
}

dset_prefix = "m"  # some common string in the names of the datasets (*?)

sample_list = ["Klegs"]
    
samples_dict = utils.find_datasets_to_process(dataroot, skips_dict, dset_prefix, sample_list)

pprint.pprint( samples_dict )

In [None]:
# manual override:
# samples_dict = {"FeAu_0p5_tR_nscope": ["top_100um", "top_200um"]}

# use the parameters we selected with the widget

sbats = []
dataset_objects = []

for sample, datasets in samples_dict.items():
    for dataset in datasets:
        print(f"Processing dataset {dataset} in sample {sample}")
        ds = ImageD11.sinograms.dataset.DataSet(dataroot=dataroot,
                                                analysisroot=analysisroot,
                                                sample=sample,
                                                dset=dataset,
                                                detector=detector,
                                                omegamotor=omegamotor,
                                                dtymotor=dtymotor
                                               )
        if os.path.exists(ds.dsfile):
            print("Load existing", ds.dsfile)
            ds = ImageD11.sinograms.dataset.load( ds.dsfile )
        else:
            print("Importing DataSet object")
            try:
                ds.import_all()
            except Exception as e:
                print(f"Problem? Skipping",sample,dataset)
                print(e)
                continue
            ds.maskfile = maskfile
            ds.e2dxfile = e2dxfile
            ds.e2dyfile = e2dyfile
            ds.save()
        print(f"I have a DataSet {ds.dset} in sample {ds.sample}")
        if os.path.exists(os.path.join(ds.analysispath, ds.sparsefiles[0])):
            print(f"Found existing Sparse file for {dataset} in sample {sample}, skipping")
        else:
            print("Segmenting")
            sbat = ImageD11.sinograms.lima_segmenter.setup(ds.dsfile, **options)
            if sbat is None:
                print(f"{dataset} in sample {sample} already lima_segmented, skipping")
            sbats.append(sbat)
        dataset_objects.append(ds)
        
utils.slurm_submit_many_and_wait(sbats, wait_time_sec=60)

for ds in dataset_objects:
    print("Labelling sparse peaks")
    if os.path.exists(ds.sparsefile):
        print("Already assembled", ds.sparsefile)
    else:
        ImageD11.sinograms.assemble_label.main(ds.dsfile)
    if os.path.exists(ds.pksfile):
        print('Already labelled',ds.pksfile)
    else:
        print("Generating peaks table")
        ImageD11.sinograms.properties.main(ds.dsfile, options={'algorithm': 'lmlabel', 'wtmax': 70000, 'save_overlaps': False})

print("Done!")