# Jupyter notebook based on ImageD11 to process scanning 3DXRD data
# Written by Haixing Fang, Jon Wright and James Ball
## Date: 16/01/2024

In [None]:
# There is a bug with the current version of ImageD11 in the site-wide Jupyter env.
# This has been fixed here: https://github.com/FABLE-3DXRD/ImageD11/commit/4af88b886b1775585e868f2339a0eb975401468f
# Until a new release has been made and added to the env, we need to get the latest version of ImageD11 from GitHub
# Put it in your home directory somewhere
# USER: Change the path below to point to your local copy of ImageD11:

id11_code_path = "/home/esrf/james1997a/Code/ImageD11"

import sys

sys.path.insert(0, id11_code_path)

In [None]:
# import functions we need

import os, glob, pprint
import numpy as np
import matplotlib
import h5py

%matplotlib widget
from matplotlib import pyplot as plt

import ImageD11.grain
import ImageD11.unitcell
import ImageD11.indexing
import ImageD11.columnfile
import ImageD11.refinegrains
import ImageD11.sinograms.dataset
import ImageD11.sinograms.properties
import ImageD11.sinograms.lima_segmenter
import ImageD11.sinograms.assemble_label

from ImageD11.blobcorrector import eiger_spatial

In [None]:
# USER: specify your experimental directory

base_dir = "/home/esrf/james1997a/Data/ihma439/id11/20231211"

rawdata_path = os.path.join(base_dir, 'RAW_DATA')

!ls -lrt {rawdata_path}

In [None]:
# USER: pick a sample and a dataset you want to segment

sample = "FeAu_0p5_tR_nscope"
dataset = "top_700um"

# USER: specify path to detector mask

processed_data_root_dir = os.path.join(base_dir, 'PROCESSED_DATA/James')  # USER: modify this to change the destination folder if desired
sparse_pixels_dir = os.path.join(processed_data_root_dir, "SparsePixels_NewMask")  # USER: modify this to change the name of the SparsePixels folder inside processed_data_root_dir

# desination of H5 files

dset_path = os.path.join(sparse_pixels_dir, f"ds_{sample}_{dataset}.h5" )
sparse_path = os.path.join(sparse_pixels_dir, f'{sample}_{dataset}_sparse.h5')
pks_path = os.path.join(sparse_pixels_dir, f'pks_{sample}_{dataset}.h5')
cf_path = os.path.join(sparse_pixels_dir, f'cf_{sample}_{dataset}.h5')

In [None]:
# load the dataset from file

ds = ImageD11.sinograms.dataset.load(dset_path)

print(ds)
print(ds.shape)

In [None]:
# merge your peaks in 2D and 4D

peaks_table = ImageD11.sinograms.properties.pks_table.load(pks_path)
peaks_4d = peaks_table.pk2dmerge(ds.omega, ds.dty)

In [None]:
# Generate a mask that selects only 4D peaks greater than 25 pixels in size

m = peaks_4d['Number_of_pixels'] > 25

# then plot omega vs dty for all peaks - should look sinusoidal

fig, ax = plt.subplots()
counts, xedges, yedges, im = ax.hist2d(peaks_4d['omega'][m], peaks_4d['dty'][m], weights=np.sqrt(peaks_4d['sum_intensity'][m]), bins=(ds.obinedges, ds.ybinedges), norm=matplotlib.colors.LogNorm())
ax.set_xlabel("Omega angle")
ax.set_ylabel("dty")

fig.colorbar(im, ax=ax)

plt.show()

In [None]:
# We will now generate a cf (columnfile) object each for the 2D and 4D peaks.
# These columnfile objects will be corrected for detector spatial distortion
# USER: specify the paths to the dxfile and dyfile

e2dx_path = os.path.join(processed_data_root_dir, '../CeO2/e2dx_E-08-0173_20231127.edf')
e2dy_path = os.path.join(processed_data_root_dir, '../CeO2/e2dy_E-08-0173_20231127.edf')

# USER: specify the path to the parameter file

par_path = 'Fe_refined.par'

spatial_correction_function = eiger_spatial(dxfile=e2dx_path, dyfile=e2dy_path)

spatial_correction_dict_4d = spatial_correction_function(peaks_4d)

cf_4d = ImageD11.columnfile.colfile_from_dict(spatial_correction_dict_4d)

# Filter the columnfile to select only peaks greater than 5 pixels in size

print(f"{cf_4d.nrows} peaks before filtration")
cf_4d.filter(cf_4d.Number_of_pixels > 5)
print(f"{cf_4d.nrows} peaks after filtration")

# calculates the scattering vector (g-vector) geometries using parameters from the file

cf_4d.parameters.loadparameters(par_path)

cf_4d.updateGeometry()

In [None]:
# plot the 4D peaks (fewer of them) as a cake (two-theta vs eta)
# if the parameters in the par file are good, these should look like straight lines

fig, ax = plt.subplots()

ax.scatter(cf_4d.tth, cf_4d.eta, s=1)

ax.set_xlabel("Two-theta")
ax.set_ylabel("eta")

plt.show()

In [None]:
# OPTIONAL: export CF to an flt so we can play with it with ImageD11_gui
# uncomment the below line

# cf_4d.writefile(f'{sample}_{dataset}_4d_peaks.flt')

In [None]:
def strongest_peaks(colf, uself=True, frac=0.995, B=0.2, doplot=None):
    # correct intensities for structure factor (decreases with 2theta)
    cor_intensity = colf.sum_intensity * (np.exp(colf.ds*colf.ds*B))
    if uself:
        lf = ImageD11.refinegrains.lf(colf.tth, colf.eta)
        cor_intensity *= lf
    order = np.argsort( cor_intensity )[::-1] # sort the peaks by intensity
    sortedpks = cor_intensity[order]
    cums =  np.cumsum(sortedpks)
    cums /= cums[-1]
    enough = np.searchsorted(cums, frac)
    # Aim is to select the strongest peaks for indexing.
    cutoff = sortedpks[enough]
    mask = cor_intensity > cutoff
    if doplot is not None:
        fig, axs = plt.subplots(1,2,figsize=(10,5))
        axs[0].plot(cums/cums[-1], ',')
        axs[0].set(xlabel='npks',ylabel='fractional intensity')
        axs[0].plot([mask.sum(),], [frac,], "o" )
        axs[1].plot(cums/cums[-1], ',')
        axs[1].set(xlabel='npks logscale',ylabel='fractional intensity', xscale='log', ylim=(doplot,1.), 
                 xlim=(np.searchsorted(cums, doplot), len(cums)))
        axs[1].plot( [mask.sum(),], [frac,], "o" )
        plt.show()
    return mask

In [None]:
# here we are filtering our peaks (cf_4d) to select only the strongest ones for indexing

# USER: modify the "frac" parameter below and re-run the cell until the orange dot sits nicely on the "elbow" of the blue line
# this indicates the fractional intensity cutoff we will select
# if the blue line does not look elbow-shaped in the logscale plot, try changing the "doplot" parameter (the y scale of the logscale plot) until it does

ms = strongest_peaks(cf_4d, frac=0.99, doplot=0.8)

In [None]:
# strongest_peaks returns a mask for cf_4d, so now we filter the peaks by the mask to keep only the brightest ones

cf_4d.filter(ms)

In [None]:
# now we can take a look at the intensities of the remaining peaks

fig, ax = plt.subplots()

ax.plot(cf_4d.tth, cf_4d.sum_intensity,',')
ax.semilogy()

ax.set_xlabel("Two-theta")
ax.set_ylabel("Intensity")

plt.show()

In [None]:
# now we can define a unit cell from our parameters

Fe = ImageD11.unitcell.unitcell_from_parameters(cf_4d.parameters)
Fe.makerings(2)

In [None]:
# now let's plot our peaks again, with the rings from the unitcell included, to check our lattice parameters are good

fig, ax = plt.subplots()

skip=1
ax.plot( cf_4d.ds[::skip], cf_4d.eta[::skip],',',alpha=0.5)
ax.plot( Fe.ringds, [0,]*len(Fe.ringds), '|', ms=90 )
ax.set_xlabel('1 / d ($\AA$)')
ax.set_ylabel('$\\eta$ (deg)')

plt.show()

In [None]:
# Let's copy our 4D peaks to a new object, so we can filter them for indexing

cf_4d_to_index = cf_4d.copy()

# remove peaks with two-theta > 25 (edge of detector)

cf_4d_to_index.filter(cf_4d_to_index.tth < 25)

# specify our ImageD11 indexer with these peaks

indexer = ImageD11.indexing.indexer_from_colfile(cf_4d_to_index)

print(f"Indexing {cf_4d_to_index.nrows} peaks")

In [None]:
# USER: set a tolerance in d-space (for assigning peaks to powder rings)

indexer.ds_tol = 0.01

# change the log level so we can see what the ring assigments look like

ImageD11.indexing.loglevel = 1

# assign peaks to powder rings

indexer.assigntorings()

# change log level back again

ImageD11.indexing.loglevel = 3

In [None]:
# let's plot the assigned peaks

fig, ax = plt.subplots()

# indexer.ra is the ring assignments

ax.scatter(cf_4d_to_index.ds, cf_4d_to_index.eta, c=indexer.ra, cmap='tab20', s=1)
ax.set_xlabel("d-star")
ax.set_ylabel("eta")

plt.show()

In [None]:
# check the maximum expected peaks
allpks = np.sum([len(indexer.unitcell.ringhkls[ds]) for ds in indexer.unitcell.ringds])
allpks

In [None]:
# now we are indexing!
# USER: specify the rings you want to use for indexing
rings = 2, 4, 6, 1

# USER: specify the HKL tolerances you want to use for indexing

# hkl_tols_seq = [0.050, 0.025, 0.010]
hkl_tols_seq = [0.04]

# USER: specify the fraction of the total expected peaks

# fracs = (0.75, 0.5)
fracs = [0.75]

ImageD11.cImageD11.cimaged11_omp_set_num_threads(1)
ImageD11.indexing.loglevel=3

# iterate over HKL tolerances
for tol in hkl_tols_seq:
    # iterate over minpks fractions
    for frac in fracs:
        for indexer.ring_1 in rings:
            for indexer.ring_2 in rings:
                indexer.minpks = allpks*frac
                indexer.hkl_tol = tol
                indexer.find()
                indexer.scorethem()                
        print(frac, tol, len(indexer.ubis))

In [None]:
def plot_index_results(ind, colfile, title):
    # Generate a histogram of |drlv| for a ubi matrix
    indexer.histogram_drlv_fit()
    indexer.fight_over_peaks()
    
    fig, axs = plt.subplots(3, 2, layout="constrained", figsize=(9,12))
    axs_flat = axs.ravel()
    
    # For each grain, plot the error in hkl vs the number of peaks with that error
    
    for grh in ind.histogram:
        axs_flat[0].plot(ind.bins[1:-1], grh[:-1], "-")
    
    axs_flat[0].set(ylabel="number of peaks",
                    xlabel="error in hkl (e.g. hkl versus integer)",
                    title=title)
    
    # set a mask of all non-assigned g-vectors
    
    m = ind.ga == -1
    
    # plot the assigned g-vectors omega vs dty (sinograms)
    
    axs_flat[1].scatter(colfile.omega[~m],
                        colfile.dty[~m],
                        c=ind.ga[~m],
                        s=2,
                        cmap='tab20')
    
    axs_flat[1].set(title=f'Sinograms of {ind.ga.max()+1} grains',
                    xlabel='Omega/deg',
                    ylabel='dty/um')
    
    # Define weak peaks as all non-assigned peaks with intensity 1e-4 of max
    cut = colfile.sum_intensity[m].max() * 1e-4
    weak = colfile.sum_intensity[m] < cut
    
    # Plot unassigned peaks in omega vs dty
    
    axs_flat[2].scatter(colfile.omega[m][weak],  colfile.dty[m][weak],  s=2, label='weak')
    axs_flat[2].scatter(colfile.omega[m][~weak], colfile.dty[m][~weak], s=2, label='not weak')
    
    axs_flat[2].set(title='Sinograms of unassigned peaks',
                    xlabel='Omega/deg',
                    ylabel='dty/um')
    axs_flat[2].legend()
    
    # Plot d-star vs intensity for all assigned peaks
    
    axs_flat[3].scatter(colfile.ds[~m], colfile.sum_intensity[~m], s=2)
    axs_flat[3].set(title='Intensity of all assigned peaks',
                    xlabel='d-star',
                    ylabel='Intensity',
                    yscale='log')
    
    # Plot d-star vs intensity for all unassigned peaks
    
    axs_flat[4].scatter(colfile.ds[m][weak],  colfile.sum_intensity[m][weak],  s=2, label='weak')
    axs_flat[4].scatter(colfile.ds[m][~weak], colfile.sum_intensity[m][~weak], s=2, label='not weak')
    
    axs_flat[4].set(title='Intensity of all unassigned peaks',
                    xlabel='d-star',
                    ylabel='Intensity',
                    yscale='log')
    axs_flat[4].legend()
    
    # Get the number of peaks per grain
    
    npks = [(ind.ga == i).sum() for i in range(len(ind.ubis))]
    
    # Plot histogram of number of peaks per grain
    
    axs_flat[5].hist(npks, bins=64)
    axs_flat[5].set(title='Hist of peaks per grain',
                    xlabel='Number of peaks',
                    ylabel='Number of grains')
    
    for ax in axs_flat:
        ax.set_box_aspect(0.7)
    
    plt.show()

In [None]:
plot_index_results(indexer, cf_4d_to_index, 'First attempt')

In [None]:
# USER: Define HKL tolerance to try to assign all peaks to existing grains

hkl_tol = 0.04

# Get an array of g-vectors from the columnfile
gvectors = np.transpose((cf_4d.gx, cf_4d.gy, cf_4d.gz)).copy()
n_gvectors = len(gvectors)

# Make storage arrays for errors (drlv2) and labels
# both arrays are persistent throughout assigments
# so they get steadily improved over time
drlv2 = np.full(n_gvectors, 2, dtype=float)
labels = np.full(n_gvectors, -1, 'i')

# Create array of grain objects, one per UBI matrix from indexer.ubis
grains = [ImageD11.grain.grain(ubi.copy()) for ubi in indexer.ubis]

# print what fraction of the g-vectors are unassigned:
print(f"Trying to assign {cf_4d.nrows} peaks")
print(f"Currently {(indexer.ga != -1).sum()/cf_4d.nrows} of peaks are assigned")

# Iterate over each UBI matrix
for i, grain in enumerate(grains):
    # Assign g-vectors to this grain if drlv2 < htl_tol**2
    # Then refine the grain using all the assigned g-vectors
    ImageD11.cImageD11.score_and_refine(grain.ubi, gvectors, hkl_tol)
    ImageD11.cImageD11.score_and_refine(grain.ubi, gvectors, hkl_tol)
    # assign all g-vectors to new refined grain
    # will re-assign g-vectors if this grain gives a lower error than currently assigned
    # updates drlv2 with new errors for new assignments
    # updates labels with new assignments
    ImageD11.cImageD11.score_and_assign(grain.ubi, gvectors, hkl_tol, drlv2, labels, i)
    # pretend all g-vectors are unassigned
    label_all_unassigned = np.full(n_gvectors, -1, 'i')
    # pretend all grains have max error
    drlv2_all_max = np.full(n_gvectors, 2, dtype=float)
    # score and assign again
    # updates drlv2_all_max with new errors for new assignments
    # updates label_all_unassigned with new assignments
    # now work out which g-vectors were assigned to this grain
    # I think allpks is "greedy"
    # In that it greedily assigns peaks to this grain assuming all other peaks are unassigned with max error
    ImageD11.cImageD11.score_and_assign(grain.ubi, gvectors, hkl_tol, drlv2_all_max, label_all_unassigned, i)

    grain.allpks = label_all_unassigned == i
    # work out the sum of the intensities of all assigned g-vectors
    grain.isum = cf_4d.sum_intensity[grain.allpks].sum()

# # Iterate over each grain again after all assigned
for i, grain in enumerate(grains):
    # Get the assigned peaks for this grain
    grain.pks = labels == i
    # Get the total number of assigned peaks for this grain
    grain.npks = grain.pks.sum()
    
    # Calculate the real hkls for each gvector of this grain
    hklr = np.dot(grain.ubi, gvectors[grain.pks].T)
    # Round them to integers
    hkli = np.round(hklr).astype(int)
    
    # Work out the number of unique peaks per grain
    # By removing duplicates
    uniqpks = np.unique(np.vstack((hkli, np.sign(cf_4d.eta[grain.pks]).astype(int))),axis=1)
    grain.nuniq = uniqpks.shape[1]

print(f"Now {(labels!=-1).sum()/len(labels)} of peaks are assigned")

# Save the output

In [None]:
# Add labels as a new column in the columnfile

cf_4d.addcolumn(labels, 'grain_id')

# Delete the columnfile output file if it exists
if os.path.exists(cf_path):
    os.remove(cf_path)

# Write columnfile as an HDF file
ImageD11.columnfile.colfile_to_hdf(cf_4d, cf_path)

# add UBIs as a new dataset
# also add greedy peaks
with h5py.File(cf_path,'a') as hout:
    hout.create_dataset('ubis', data=np.array([grain.ubi for grain in grains]))
    hout.create_dataset('ubis_allpks', data=np.array([grain.allpks for grain in grains]))