# Jupyter notebook based on ImageD11 to process scanning 3DXRD data
# Written by Haixing Fang, Jon Wright and James Ball
## Date: 16/02/2024

In [1]:
# There is a bug with the current version of ImageD11 in the site-wide Jupyter env.
# This has been fixed here: https://github.com/FABLE-3DXRD/ImageD11/commit/4af88b886b1775585e868f2339a0eb975401468f
# Until a new release has been made and added to the env, we need to get the latest version of ImageD11 from GitHub
# Put it in your home directory
# USER: Change the path below to point to your local copy of ImageD11:

import os

username = os.environ.get("USER")

id11_code_path = f"/home/esrf/{username}/Code/ImageD11"

import sys

sys.path.insert(0, id11_code_path)

In [21]:
# import functions we need

import glob, pprint
import fabio
import time
import shutil

import ImageD11.sinograms.dataset
import ImageD11.sinograms.lima_segmenter
import ImageD11.sinograms.assemble_label
import ImageD11.sinograms.properties

import numpy as np
import fabio
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from skimage import filters, measure, morphology
import ipywidgets as widgets
import h5py
from IPython.display import display
%matplotlib widget

In [3]:
# Check that we're importing ImageD11 from the home directory rather than from the Jupyter kernel

?ImageD11.sinograms.dataset

[0;31mType:[0m        module
[0;31mString form:[0m <module 'ImageD11.sinograms.dataset' from '/home/esrf/james1997a/Code/ImageD11/ImageD11/sinograms/dataset.py'>
[0;31mFile:[0m        ~/Code/ImageD11/ImageD11/sinograms/dataset.py
[0;31mDocstring:[0m   <no docstring>

In [4]:
# TODO: Write a detector function to check whether it's an old-style data format or a new one

In [5]:
# OLD DATASETS

# NOTE: For old datasets before the new directory layout structure, we don't distinguish between RAW_DATA and PROCESSED_DATA
# In this case, use this cell to specify where your experimental folder is, and do not run the cell below
# e.g /data/visitor/4752/id11/20210513

### USER: specify your experimental directory

rawdata_path = "/home/esrf/james1997a/Data/ma4752/id11/20210618"

!ls -lrt {rawdata_path}

### USER: specify where you want your processed data to go

processed_data_root_dir = "/home/esrf/james1997a/Data/ma4752/id11/20240118/James"

total 5249
-rwxr-x---  1 backup backup 5372439 Jun 25  2021 ma4752_id11.h5
drwxr-x--- 25 backup backup    4096 Jan 18 06:08 MA4752_S4_2_XRD


In [6]:
# USER: pick a sample and a dataset you want to segment

sample = "MA4752_S4_2_XRD"
dataset = "DTL1z90"

# USER: specify path to detector mask

mask_path = "/data/id11/nanoscope/Eiger/mask_20210428.edf"

In [7]:
# create ImageD11 dataset object

ds = ImageD11.sinograms.dataset.DataSet(dataroot=rawdata_path,
                                        analysisroot=processed_data_root_dir,
                                        sample=sample,
                                        dset=dataset)
ds.import_all()
ds.save()

In [8]:
# TODO: autodetect eiger/frelon (get from ds object)

In [9]:
#Define the initial parameters
start_pars = {#"bgfile": bg_path,
              "maskfile": mask_path,
              "cut": 1,
              "pixels_in_spot": 3}

mask = fabio.open(start_pars["maskfile"]).data
#bgimage = fabio.open(start_pars["bgfile"]).data

# no background
bgimage = np.zeros_like(mask)

with h5py.File(ds.masterfile, 'r') as h5In:
    image = h5In['300.1/measurement/eiger'][150].astype('uint16')

def segment_image(image, cut, pixels_in_spot, bgimage):
    cut_image = (image - bgimage)*(1-mask) > cut
    labeled_image = measure.label(cut_image)
    blob_properties = measure.regionprops(labeled_image)
    blob_mask = np.zeros_like(image, dtype=np.uint8)
    for prop in blob_properties:
        if prop.area >= pixels_in_spot:
            blob_mask[labeled_image == prop.label] = 1
    filtered_image = (image-bgimage) * blob_mask
    return filtered_image


def update_image(cut, pixels_in_spot):
    filtered_image = segment_image(image, cut, pixels_in_spot, bgimage)
    # filtered_image[filtered_image == 0] = filtered_image[filtered_image == 0] + 1e-2
    plt.imshow(filtered_image, cmap='viridis', norm=LogNorm(vmin=1, vmax=1000), interpolation="nearest") 
    plt.title(f"cut={cut}, pixels_in_spot={pixels_in_spot}")
    plt.show()
    

cut_slider = widgets.IntSlider(value=start_pars["cut"], min=1, max=2000, step=1, description='Cut:')
pixels_in_spot_slider = widgets.IntSlider(value=start_pars["pixels_in_spot"], min=1, max=20, step=1, description='Pixels in Spot:')
plt.figure(figsize=(10, 10))
interactive_plot = widgets.interactive(update_image, cut=cut_slider, pixels_in_spot=pixels_in_spot_slider)
display(interactive_plot)

interactive(children=(IntSlider(value=1, description='Cut:', max=2000, min=1), IntSlider(value=3, description=…

In [10]:
end_pars = {# "bgfile": bg_path,
              "maskfile": mask_path,
              "cut": cut_slider.value,
              "pixels_in_spot": pixels_in_spot_slider.value}

In [11]:
# create batch file to send to SLURM cluster

sbat = ImageD11.sinograms.lima_segmenter.setup(ds.dsfile, **end_pars)
sbat

cut 1
howmany 100000
pixels_in_spot 3
maskfile /data/id11/nanoscope/Eiger/mask_20210428.edf
bgfile 
cores_per_job 8
files_per_core 8
total files to process 1082 done 0
# Opened mask /data/id11/nanoscope/Eiger/mask_20210428.edf  6.02 % pixels are active


'/home/esrf/james1997a/Data/ma4752/id11/20240118/James/MA4752_S4_2_XRD/MA4752_S4_2_XRD_DTL1z90/slurm/lima_segmenter_slurm.sh'

In [12]:
# send batch file to SLURM cluster

sbatch_submit_result = !sbatch {sbat}

slurm_job_number = None

sbatch_submit_result = str(sbatch_submit_result[0])

if sbatch_submit_result.startswith("Submitted"):
    slurm_job_number = sbatch_submit_result.split("job ")[1]

print(slurm_job_number)

12089461


In [13]:
assert slurm_job_number is not None

slurm_job_finished = False

while not slurm_job_finished:
    squeue_results = !squeue -u $USER
    squeue_results = str(squeue_results)
    if slurm_job_number not in squeue_results:
        print("Slurm job finished!")
        slurm_job_finished = True
    else:
        print("Slurm job not finished! Waiting 10 seconds...")
        time.sleep(10)

Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job not finished! Waiting 10 seconds...
Slurm job finished!


In [15]:
# label sparse peaks

ImageD11.sinograms.assemble_label.main(ds.dsfile, ds.sparsefile)

Harvesting /home/esrf/james1997a/Data/ma4752/id11/20210618/MA4752_S4_2_XRD/MA4752_S4_2_XRD_DTL1z90/MA4752_S4_2_XRD_DTL1z90.h5: 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1, 10.1, 11.1, 12.1, 13.1, 14.1, 15.1, 16.1, 17.1, 18.1, 19.1, 20.1, 21.1, 22.1, 23.1, 24.1, 25.1, 26.1, 27.1, 28.1, 29.1, 30.1, 31.1, 32.1, 33.1, 34.1, 35.1, 36.1, 37.1, 38.1, 39.1, 40.1, 41.1, 42.1, 43.1, 44.1, 45.1, 46.1, 47.1, 48.1, 49.1, 50.1, 51.1, 52.1, 53.1, 54.1, 55.1, 56.1, 57.1, 58.1, 59.1, 60.1, 61.1, 62.1, 63.1, 64.1, 65.1, 66.1, 67.1, 68.1, 69.1, 70.1, 71.1, 72.1, 73.1, 74.1, 75.1, 76.1, 77.1, 78.1, 79.1, 80.1, 81.1, 82.1, 83.1, 84.1, 85.1, 86.1, 87.1, 88.1, 89.1, 90.1, 91.1, 92.1, 93.1, 94.1, 95.1, 96.1, 97.1, 98.1, 99.1, 100.1, 101.1, 102.1, 103.1, 104.1, 105.1, 106.1, 107.1, 108.1, 109.1, 110.1, 111.1, 112.1, 113.1, 114.1, 115.1, 116.1, 117.1, 118.1, 119.1, 120.1, 121.1, 122.1, 123.1, 124.1, 125.1, 126.1, 127.1, 128.1, 129.1, 130.1, 131.1, 132.1, 133.1, 134.1, 135.1, 136.1, 137.1, 138.1, 139.1, 140.1, 1

In [16]:
# generate peaks table

ImageD11.sinograms.properties.main(ds.dsfile, ds.sparsefile, ds.pksfile, options={'algorithm': 'lmlabel', 'wtmax': 70000, 'save_overlaps': False})

read ds /home/esrf/james1997a/Data/ma4752/id11/20240118/James/MA4752_S4_2_XRD/MA4752_S4_2_XRD_DTL1z90/MA4752_S4_2_XRD_DTL1z90_dataset.h5 : 0.081660 /s
Nscans 541
Options {'algorithm': 'lmlabel', 'wtmax': 70000, 'save_overlaps': False, 'nproc': 39}


100%|██████████| 579/579 [00:25<00:00, 23.16it/s]
 97%|█████████▋| 38/39 [00:01<00:00, 20.34it/s]


146461014 label and pair : 27.308522 /s
.............................................................................................................................................................................................................................................................................................................................................................11959372 connected components : 21.028050 /s
write hdf5 : 2.332894 /s
Trying to clean up shared memory


In [23]:
# make a new subfolder called "sparse" that holds all the individual "scan______sparse.h5" files

sparse_folder_path = os.path.join(ds.analysispath, "sparse")

if not os.path.exists(sparse_folder_path):
    os.mkdir(sparse_folder_path)
    
scan_sparse_files = glob.glob(os.path.join(ds.analysispath, "scan*_sparse.h5"))

for scan_sparse_file in scan_sparse_files:
    shutil.move(scan_sparse_file, sparse_folder_path)

In [17]:
# TODO: incorporate DATA/visitor/ma5839/id11/20240118/SCRIPTS/0_S3DXRD_segment_and_label_single_dset.ipynb