# S3DXRD processing first step: dataset creation, segmentation and peak labelling

Written by James Ball, Haixing Fang and Jon Wright

Last updated: 23/07/2024

In [None]:
# Scroll down past this code for setting up a project

import os, sys

def guess_ESRF_paths():  # This should be in silx somewhere?
    path_items = os.getcwd().split('/')
    if 'visitor' in path_items:  
        idx = path_items.index('visitor')
        experiment, session = path_items[ idx + 1 ], path_items[ idx + 3 ]
        dataroot = f"/data/visitor/{experiment}/id11/{session}/RAW_DATA"
        proc = f"/data/visitor/{experiment}/id11/{session}/PROCESSED_DATA"
        scripts = f"/data/visitor/{experiment}/id11/{session}/SCRIPTS"
        return dataroot, proc, scripts
    else:
        raise ValueError("You need to manually give dataroot, analysisroot and scripts folders")

def run_from_git( path ):
    code_path = os.path.join(path, "ImageD11")
    if not os.path.exists(code_path):
        os.system('git clone https://github.com/FABLE-3DXRD/ImageD11 ' + path)
        assert os.path.exists(code_path),'failed to checkout from git'
    bld = os.path.join(code_path, "build")
    if not os.path.exists(bld):
        os.system('cd '+code_path+' && python setup.py build_ext --inplace')
        assert os.path.exists(bld), 'failed to compile'
    sys.path.insert(0, code_path)
    import ImageD11, ImageD11.cImageD11
    print("Running from:", ImageD11.__file__)
    

def printsamples( dataroot ):
    samples = sorted( [ name for name in os.listdir( dataroot ) 
             if os.path.isdir( os.path.join( dataroot, name ) ) ] )
    print("Samples:\n\t"+"\n\t".join(sorted( samples ) ))
    
def printdatasets( dataroot, sample):
    sroot = os.path.join(dataroot, sample)
    print("Datsets:\n\t"+"\n\t".join(sorted( 
        [ name[len(sample)+1:] for name in os.listdir( sroot ) 
         if os.path.isdir( os.path.join( sroot, name ) ) 
         and name.startswith( sample ) ] ) ) )

In [None]:
# Set up the file paths. Edit this if you are not at ESRF or not using the latest data policy.
dataroot, analysisroot, scripts = guess_ESRF_paths()

assert os.path.exists(dataroot), "Please fill in the dataroot and analysisroot folder names"
# dataroot     holds raw data       in folders dataroot     + {sample}/{sample}_{dataset}
# analysisroot holds output results in folders analysisroot + {sample}/{sample}_{dataset}
# scripts can hold a local installation of ImageD11 if you need one
print("\n".join((dataroot, analysisroot, scripts)))

In [None]:
# Optional : run the latest code from github
run_from_git( scripts )
import pprint
import numpy as np
import ImageD11.sinograms.dataset
import ImageD11.sinograms.lima_segmenter
import ImageD11.sinograms.assemble_label
import ImageD11.sinograms.properties
import ImageD11.nbGui.nb_utils as utils
from ImageD11.nbGui import segmenter_gui

In [None]:
# List the samples available:
printsamples(dataroot)

In [None]:
# USER: Decide which sample
sample = 'WAu'

In [None]:
# List the datasets for that sample:
printdatasets( dataroot, sample )

In [None]:
# USER: Decide which dataset
dataset = "fs2d_2"

In [None]:
# create ImageD11 dataset object
ds = ImageD11.sinograms.dataset.DataSet(dataroot=dataroot,
                                        analysisroot=analysisroot,
                                        sample=sample,
                                        dset=dataset)
ds.import_all()

In [None]:
# Experts : update these files for your detector if you need to
maskfile = ds.maskfile = "/data/id11/nanoscope/Eiger/eiger_mask_E-08-0144_20240205.edf"
e2dxfile = ds.e2dxfile = "/data/id11/nanoscope/Eiger/e2dx_E-08-0144_20240205.edf"
e2dyfile = ds.e2dyfile = "/data/id11/nanoscope/Eiger/e2dy_E-08-0144_20240205.edf"

In [None]:
ds.save()

In [None]:
ui = segmenter_gui.SegmenterGui(ds)

In [None]:
options = ui.getopts()

In [None]:
# create batch file to send to SLURM cluster

sbat = ImageD11.sinograms.lima_segmenter.setup(ds.dsfile, **ui.getopts())
if sbat is None:
    raise ValueError("This scan has already been segmented!")
print(sbat)

In [None]:
utils.slurm_submit_and_wait(sbat, 60)

In [None]:
# label sparse peaks

ImageD11.sinograms.assemble_label.main(ds.dsfile)

In [None]:
# generate peaks table

ImageD11.sinograms.properties.main(ds.dsfile, options={'algorithm': 'lmlabel', 'wtmax': 70000, 'save_overlaps': False})

In [None]:
if 1:
    raise ValueError("Change the 1 above to 0 to allow 'Run all cells' in the notebook")

In [None]:
skips_dict = {
    "FeAu_0p5_tR_nscope": ["top_-50um", "top_-100um"]
}

dset_prefix = "m"  # some common string in the names of the datasets (*?)

sample_list = ["Klegs"]
    
samples_dict = utils.find_datasets_to_process(dataroot, skips_dict, dset_prefix, sample_list)

pprint.pprint( samples_dict )

In [None]:
# manual override:
# samples_dict = {"FeAu_0p5_tR_nscope": ["top_100um", "top_200um"]}

# use the parameters we selected with the widget

seg_pars = options = None

sbats = []
dataset_objects = []

for sample, datasets in samples_dict.items():
    for dataset in datasets:
        print(f"Processing dataset {dataset} in sample {sample}")
        ds = ImageD11.sinograms.dataset.DataSet(dataroot=dataroot,
                                                analysisroot=analysisroot,
                                                sample=sample,
                                                dset=dataset)
        if os.path.exists(ds.dsfile):
            print("Load existing", ds.dsfile)
            ds = ImageD11.sinograms.dataset.load( ds.dsfile )
        else:
            print("Importing DataSet object")
            try:
                ds.import_all()
            except Exception e:
                print(f"Problem? Skipping",sample,dataset)
                print(e)
                continue
            ds.maskfile = maskfile
            ds.e2dxfile = e2dxfile
            ds.e2dyfile = e2dyfile
            ds.save()
        print(f"I have a DataSet {ds.dset} in sample {ds.sample}")
        if os.path.exists(os.path.join(ds.analysispath, ds.sparsefiles[0])):
            print(f"Found existing Sparse file for {dataset} in sample {sample}, skipping")
        else:
            print("Segmenting")
            sbat = ImageD11.sinograms.lima_segmenter.setup(ds.dsfile, **seg_pars)
            if sbat is None:
                print(f"{dataset} in sample {sample} already lima_segmented, skipping")
            sbats.append(sbat)
        dataset_objects.append(ds)
        
utils.slurm_submit_many_and_wait(sbats, wait_time_sec=60)

for ds in dataset_objects:
    print("Labelling sparse peaks")
    if os.path.exists(ds.sparsefile):
        print("Already assembled", ds.sparsefile)
    else:
        ImageD11.sinograms.assemble_label.main(ds.dsfile)
    if os.path.exists(ds.pksfile):
        print('Already labelled',ds.pksfile)
    else:
        print("Generating peaks table")
        ImageD11.sinograms.properties.main(ds.dsfile, options={'algorithm': 'lmlabel', 'wtmax': 70000, 'save_overlaps': False})

print("Done!")