# Jupyter notebook based on ImageD11 to process scanning 3DXRD data
# Written by Haixing Fang, Jon Wright and James Ball
## Date: 16/01/2024

In [None]:
# There is a bug with the current version of ImageD11 in the site-wide Jupyter env.
# This has been fixed here: https://github.com/FABLE-3DXRD/ImageD11/commit/4af88b886b1775585e868f2339a0eb975401468f
# Until a new release has been made and added to the env, we need to get the latest version of ImageD11 from GitHub
# Put it in your home directory somewhere
# USER: Change the path below to point to your local copy of ImageD11:

id11_code_path = "/home/esrf/james1997a/Code/ImageD11"

import sys

sys.path.insert(0, id11_code_path)

In [None]:
# import functions we need

import os, glob, pprint
import numpy as np
import matplotlib
import h5py

%matplotlib widget
from matplotlib import pyplot as plt

import ImageD11.grain
import ImageD11.unitcell
import ImageD11.indexing
import ImageD11.columnfile
import ImageD11.refinegrains
import ImageD11.sinograms.dataset
import ImageD11.sinograms.properties
import ImageD11.sinograms.lima_segmenter
import ImageD11.sinograms.assemble_label

from ImageD11.blobcorrector import eiger_spatial

In [None]:
# OLD DATASETS

# NOTE: For old datasets before the new directory layout structure, we don't distinguish between RAW_DATA and PROCESSED_DATA
# In this case, use this cell to specify where your experimental folder is, and delete the # NEW DATASETS cell below
# e.g /data/visitor/4752/id11/20210513

### USER: specify your experimental directory

rawdata_path = "/home/esrf/james1997a/Data/ma4752/id11/20210618"

!ls -lrt {rawdata_path}

### USER: specify where you want your processed data to go

processed_data_root_dir = "/home/esrf/james1997a/Data/ma4752/id11/20240118/James"
sparse_pixels_dir = os.path.join(processed_data_root_dir, "SparsePixels")  # USER: modify this to change the name of the SparsePixels folder inside processed_data_root_dir

In [None]:
# NEW DATASETS
# If you have RAW_DATA and PROCESSED_DATA folders, you should run this cell and delete the # OLD DATASETS cell above

### USER: specify your experimental directory

base_dir = "/home/esrf/james1997a/Data/ihma439/id11/20231211"

rawdata_path = os.path.join(base_dir, 'RAW_DATA')

!ls -lrt {rawdata_path}

processed_data_root_dir = os.path.join(base_dir, 'PROCESSED_DATA/James/20240123')  # USER: modify this to change the destination folder if desired
sparse_pixels_dir = os.path.join(processed_data_root_dir, "SparsePixels")  # USER: modify this to change the name of the SparsePixels folder inside processed_data_root_dir

In [None]:
# USER: pick a sample and a dataset you want to segment

sample = "FeAu_0p5_tR_nscope"
dataset = "top_100um"

In [None]:
# desination of H5 files

dset_path = os.path.join(sparse_pixels_dir, f"ds_{sample}_{dataset}.h5" )
sparse_path = os.path.join(sparse_pixels_dir, f'{sample}_{dataset}_sparse.h5')
pks_path = os.path.join(sparse_pixels_dir, f'pks_{sample}_{dataset}.h5')
cf_path = os.path.join(sparse_pixels_dir, f'cf_{sample}_{dataset}.h5')
grains_path = os.path.join(sparse_pixels_dir, f'grains_{sample}_{dataset}.map')
par_path = 'Fe_refined.par'

e2dx_path = os.path.join(processed_data_root_dir, '../../CeO2/e2dx_E-08-0173_20231127.edf')
e2dy_path = os.path.join(processed_data_root_dir, '../../CeO2/e2dy_E-08-0173_20231127.edf')

In [None]:
# load the dataset from file

ds = ImageD11.sinograms.dataset.load(dset_path)

print(ds)
print(ds.shape)

In [None]:
# merge your peaks in 2D and 4D

peaks_table = ImageD11.sinograms.properties.pks_table.load(pks_path)
peaks_4d = peaks_table.pk2dmerge(ds.omega, ds.dty)

In [None]:
# Generate a mask that selects only 4D peaks greater than 25 pixels in size

m = peaks_4d['Number_of_pixels'] > 25

# then plot omega vs dty for all peaks - should look sinusoidal

fig, ax = plt.subplots()
counts, xedges, yedges, im = ax.hist2d(peaks_4d['omega'][m], peaks_4d['dty'][m], weights=np.sqrt(peaks_4d['sum_intensity'][m]), bins=(ds.obinedges, ds.ybinedges), norm=matplotlib.colors.LogNorm())
ax.set_xlabel("Omega angle")
ax.set_ylabel("dty")

fig.colorbar(im, ax=ax)

plt.show()

In [None]:
# We will now generate a cf (columnfile) object for the 4D peaks.
# Will be corrected for detector spatial distortion

spatial_correction_function = eiger_spatial(dxfile=e2dx_path, dyfile=e2dy_path)

spatial_correction_dict_4d = spatial_correction_function(peaks_4d)

cf_4d = ImageD11.columnfile.colfile_from_dict(spatial_correction_dict_4d)

# calculates the scattering vector (g-vector) geometries using parameters from the file

cf_4d.parameters.loadparameters(par_path)

cf_4d.updateGeometry()

In [None]:
# plot the 4D peaks (fewer of them) as a cake (two-theta vs eta)
# if the parameters in the par file are good, these should look like straight lines

fig, ax = plt.subplots()

ax.scatter(cf_4d.tth, cf_4d.eta, s=1)

ax.set_xlabel("Two-theta")
ax.set_ylabel("eta")

plt.show()

In [None]:
# OPTIONAL: export CF to an flt so we can play with it with ImageD11_gui
# uncomment the below line

# cf_4d.writefile(f'{sample}_{dataset}_4d_peaks.flt')

In [None]:
# filter the peaks to select only the brightest ones for sinogram use

def strongest_peaks(colf, uself=True, frac=0.995, B=0.2, doplot=None):
    # correct intensities for structure factor (decreases with 2theta)
    cor_intensity = colf.sum_intensity * (np.exp(colf.ds*colf.ds*B))
    if uself:
        lf = ImageD11.refinegrains.lf(colf.tth, colf.eta)
        cor_intensity *= lf
    order = np.argsort( cor_intensity )[::-1] # sort the peaks by intensity
    sortedpks = cor_intensity[order]
    cums =  np.cumsum(sortedpks)
    cums /= cums[-1]
    enough = np.searchsorted(cums, frac)
    # Aim is to select the strongest peaks for indexing.
    cutoff = sortedpks[enough]
    mask = cor_intensity > cutoff
    if doplot is not None:
        fig, axs = plt.subplots(1,2,figsize=(10,5))
        axs[0].plot(cums/cums[-1], ',')
        axs[0].set(xlabel='npks',ylabel='fractional intensity')
        axs[0].plot([mask.sum(),], [frac,], "o" )
        axs[1].plot(cums/cums[-1], ',')
        axs[1].set(xlabel='npks logscale',ylabel='fractional intensity', xscale='log', ylim=(doplot,1.), 
                 xlim=(np.searchsorted(cums, doplot), len(cums)))
        axs[1].plot( [mask.sum(),], [frac,], "o" )
        plt.show()
    return mask

def selectpeaks( cf, dstol=0.005, dsmax = 100, frac=0.99):
    cell = ImageD11.unitcell.unitcell_from_parameters( cf.parameters )
    cell.makerings( dsmax )
    m = np.zeros( cf.nrows, bool )
    for v in cell.ringds:
        if v < dsmax:
            m |= (abs(cf.ds - v) < dstol)
    cfc = cf.copy()
    cfc.filter( m )
    ms = strongest_peaks( cfc, frac = frac, doplot = frac*0.5 )
    cfc.filter( ms )
    return cfc

In [None]:
# here we are filtering our peaks (cf_4d) to select only the strongest ones for indexing purposes only!
# dsmax is being set to limit rings given to the indexer - 6-8 rings is normally good

# USER: modify the "frac" parameter below and re-run the cell until the orange dot sits nicely on the "elbow" of the blue line
# this indicates the fractional intensity cutoff we will select
# if the blue line does not look elbow-shaped in the logscale plot, try changing the "doplot" parameter (the y scale of the logscale plot) until it does

cf_strong = selectpeaks(cf_4d, frac=0.99, dsmax=1.6)

In [None]:
# now we can take a look at the intensities of the remaining peaks

fig, ax = plt.subplots()

ax.plot(cf_strong.ds, cf_strong.sum_intensity,',')
ax.semilogy()

ax.set_xlabel("Dstar")
ax.set_ylabel("Intensity")

plt.show()

In [None]:
# now we can define a unit cell from our parameters

Fe = ImageD11.unitcell.unitcell_from_parameters(cf_strong.parameters)
Fe.makerings(cf_strong.ds.max())

In [None]:
# now let's plot our peaks again, with the rings from the unitcell included, to check our lattice parameters are good

fig, ax = plt.subplots()

skip=1
ax.plot( cf_strong.ds[::skip], cf_strong.eta[::skip],',',alpha=0.5)
ax.plot( Fe.ringds, [0,]*len(Fe.ringds), '|', ms=90 )
ax.set_xlabel('1 / d ($\AA$)')
ax.set_ylabel('$\\eta$ (deg)')

plt.show()

In [None]:
# specify our ImageD11 indexer with these peaks
# we're aiming to index around 3_000 to 10_000 peaks

indexer = ImageD11.indexing.indexer_from_colfile(cf_strong)

print(f"Indexing {cf_strong.nrows} peaks")

In [None]:
# USER: set a tolerance in d-space (for assigning peaks to powder rings)

indexer.ds_tol = 0.01

# change the log level so we can see what the ring assigments look like

ImageD11.indexing.loglevel = 1

# assign peaks to powder rings

indexer.assigntorings()

# change log level back again

ImageD11.indexing.loglevel = 3

In [None]:
# let's plot the assigned peaks

fig, ax = plt.subplots()

# indexer.ra is the ring assignments

ax.scatter(cf_strong.ds, cf_strong.eta, c=indexer.ra, cmap='tab20', s=1)
ax.set_xlabel("d-star")
ax.set_ylabel("eta")

plt.show()

In [None]:
# now we are indexing!

max_multiplicity = 13

n_peaks_expected = 0
rings = []
for i, dstar in enumerate(indexer.unitcell.ringds):
    multiplicity = len(indexer.unitcell.ringhkls[indexer.unitcell.ringds[i] ])
    counts_on_this_ring = (indexer.ra == i).sum()
    if counts_on_this_ring > 0:
        n_peaks_expected += multiplicity
    if multiplicity < max_multiplicity:
        rings.append((counts_on_this_ring, multiplicity, i))
        
rings.sort()

print(f"{n_peaks_expected} peaks expected")
print(f"Trying these rings (counts, multiplicity, ring number): {rings}")

# USER: specify the HKL tolerances you want to use for indexing
hkl_tols_seq = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.1]

# USER: specify the fraction of the total expected peaks
fracs = [0.9, 0.8, 0.7, 0.6, 0.5, 0.5, 0.5]

ImageD11.cImageD11.cimaged11_omp_set_num_threads(1)
ImageD11.indexing.loglevel=3

indexer.uniqueness = 0.3
indexer.cosine_tol = np.cos(np.radians(90.25))

# iterate over HKL tolerances
for frac in fracs:
    for tol in hkl_tols_seq:
        indexer.minpks = n_peaks_expected*frac
        indexer.hkl_tol = tol
        # iterate over rings
        for _,_,indexer.ring_1 in rings:
            for _,_,indexer.ring_2 in rings:
                indexer.find()
                indexer.scorethem()                
        print(frac, tol, len(indexer.ubis))

In [None]:
def plot_index_results(ind, colfile, title):
    # Generate a histogram of |drlv| for a ubi matrix
    indexer.histogram_drlv_fit()
    indexer.fight_over_peaks()
    
    fig, axs = plt.subplots(3, 2, layout="constrained", figsize=(9,12))
    axs_flat = axs.ravel()
    
    # For each grain, plot the error in hkl vs the number of peaks with that error
    
    for grh in ind.histogram:
        axs_flat[0].plot(ind.bins[1:-1], grh[:-1], "-")
    
    axs_flat[0].set(ylabel="number of peaks",
                    xlabel="error in hkl (e.g. hkl versus integer)",
                    title=title)
    
    # set a mask of all non-assigned g-vectors
    
    m = ind.ga == -1
    
    # plot the assigned g-vectors omega vs dty (sinograms)
    
    axs_flat[1].scatter(colfile.omega[~m],
                        colfile.dty[~m],
                        c=ind.ga[~m],
                        s=2,
                        cmap='tab20')
    
    axs_flat[1].set(title=f'Sinograms of {ind.ga.max()+1} grains',
                    xlabel='Omega/deg',
                    ylabel='dty/um')
    
    # Define weak peaks as all non-assigned peaks with intensity 1e-4 of max
    cut = colfile.sum_intensity[m].max() * 1e-4
    weak = colfile.sum_intensity[m] < cut
    
    # Plot unassigned peaks in omega vs dty
    
    axs_flat[2].scatter(colfile.omega[m][weak],  colfile.dty[m][weak],  s=2, label='weak')
    axs_flat[2].scatter(colfile.omega[m][~weak], colfile.dty[m][~weak], s=2, label='not weak')
    
    axs_flat[2].set(title='Sinograms of unassigned peaks',
                    xlabel='Omega/deg',
                    ylabel='dty/um')
    axs_flat[2].legend()
    
    # Plot d-star vs intensity for all assigned peaks
    
    axs_flat[3].scatter(colfile.ds[~m], colfile.sum_intensity[~m], s=2)
    axs_flat[3].set(title='Intensity of all assigned peaks',
                    xlabel='d-star',
                    ylabel='Intensity',
                    yscale='log')
    
    # Plot d-star vs intensity for all unassigned peaks
    
    axs_flat[4].scatter(colfile.ds[m][weak],  colfile.sum_intensity[m][weak],  s=2, label='weak')
    axs_flat[4].scatter(colfile.ds[m][~weak], colfile.sum_intensity[m][~weak], s=2, label='not weak')
    
    axs_flat[4].set(title='Intensity of all unassigned peaks',
                    xlabel='d-star',
                    ylabel='Intensity',
                    yscale='log')
    axs_flat[4].legend()
    
    # Get the number of peaks per grain
    
    npks = [(ind.ga == i).sum() for i in range(len(ind.ubis))]
    
    # Plot histogram of number of peaks per grain
    
    axs_flat[5].hist(npks, bins=64)
    axs_flat[5].set(title='Hist of peaks per grain',
                    xlabel='Number of peaks',
                    ylabel='Number of grains')
    
    for ax in axs_flat:
        ax.set_box_aspect(0.7)
    
    plt.show()

In [None]:
plot_index_results(indexer, cf_strong, 'First attempt')

In [None]:
# save grain data

grains = [ImageD11.grain.grain(ubi, translation=np.array([0., 0., 0.])) for ubi in indexer.ubis]

ImageD11.grain.write_grain_file(grains_path, grains)

In [None]:
# Delete the columnfile output file if it exists
if os.path.exists(cf_path):
    os.remove(cf_path)

# Write columnfile as an HDF file
ImageD11.columnfile.colfile_to_hdf(cf_4d, cf_path)