Refine a single crystal structure

In [None]:
exec(open('/data/id11/nanoscope/install_ImageD11_from_git.py').read())
PYTHONPATH = setup_ImageD11_from_git( os.path.join( os.environ['HOME'],'git'), 'ImageD11_clean' )

In [None]:
# Set up the code to run latest version from this path
import os, sys, numpy as np, pylab as pl
import fabio
import ImageD11.sinograms.dataset
import ImageD11.refinegrains
import ImageD11.sinograms.properties
import ImageD11.indexing
import ImageD11.sym_u
import ImageD11.peakselect

In [None]:
if not os.path.exists('processed'):
    import ImageD11.fetch_data
    ImageD11.fetch_data.si_cube_s3dxrd_dataset('.', allow_download=True)

In [None]:
# Load a test dataset

dsname = os.path.join('processed','Si_cube','Si_cube_S3DXRD_nt_moves_dty',
                      'Si_cube_S3DXRD_nt_moves_dty_dataset.h5')
dset = ImageD11.sinograms.dataset.load( dsname )
dset.maskfile = "/data/id11/nanoscope/Eiger/eiger_mask_E-08-0173_20240122.edf"

In [None]:
if not os.path.exists(dset.pksfile):
    ImageD11.sinograms.properties.main( dsname )

In [None]:
# Get the fpico6 monitor
dset.pk2d['monitor'] = dset.get_monitor_pk2d( dset.pk2d, name='fpico6')
cf = dset.get_cf_2d()
dset.update_colfile_pars( cf )
" ".join(cf.titles)

In [None]:
cf.addcolumn(  ImageD11.refinegrains.lf( cf.tth, cf.eta ), "lf" )
cf.addcolumn(  ImageD11.refinegrains.polarization( cf.tth, cf.eta ), "polarization" )

In [None]:
cf_4d = dset.get_cf_4d()

In [None]:
cf_4d.filter(cf_4d.npk2d>1)
phase_name = 'Si'
dset.update_colfile_pars(cf_4d, 'Si')

In [None]:
idx = ImageD11.indexing.index(cf_4d, npk_tol = [(cf_4d.nrows//2, 0.05),], maxpairs=10)

In [None]:
assert len(idx.ubis) == 1, 'expecting one grain to be found'
g = ImageD11.grain.grain( ImageD11.sym_u.find_uniq_u( idx.ubis[0] , ImageD11.sym_u.cubic()) ) 

In [None]:
g.UB, g.ubi, g.unitcell

In [None]:
%%time
# Index *all* of the peaks in the file. 
# H,K,L as real numbers
hkl_real = g.ubi.dot( (cf.gx, cf.gy, cf.gz ) )
hkl_int = np.round( hkl_real ).astype(int)
gcalc = g.ub.dot( hkl_int )
gobs = (cf.gx, cf.gy, cf.gz)
gerr2 = ((gcalc - gobs)**2).sum(axis=0)
s = ( np.sign( cf.yl ).astype(int) + 1 ) // 2

# Save the h,k,l,sign(y) and gerr2
for i, hkl in enumerate('hkl'):
    cf.addcolumn( hkl_int[i], hkl )
cf.addcolumn( s, 'signY')
cf.addcolumn( gerr2, 'gerr2' )

In [None]:
# plot the results from the indexing to peaks
f,ax = pl.subplots(2,3,figsize=(12,8), constrained_layout=True)
for i in range(3):
    for ys in 0,1:
        m = s == ys
        j = (i+1)%3
        ax[ys, i].plot( hkl_real[i,m], hkl_real[j,m], ".", ms = 1, alpha=0.5 )
        ax[ys, i].set( xlabel='hkl'[i], ylabel='hkl'[j], title=ys )
        ax[ys, i].grid()

In [None]:
gerr2_cut = 2e-4

In [None]:
f, ax = pl.subplots(1,1,figsize=(8,6), sharey=True,constrained_layout=True)
f.colorbar( ax.hist2d( gerr2, pow(cf.sum_intensity,1/3), bins=(np.logspace(-8,0,64),64), norm='log')[-1], ax=ax)
ax.plot( [ gerr2_cut, gerr2_cut ], ax.get_ylim(), "k-" )
ax.set( xlabel = 'G-vector error', xscale='log', ylabel='intensity^(1/3)' );

In [None]:
# Remove the obviously wrong peaks
print('All peaks',cf.nrows)
cf.filter( cf.gerr2 < gerr2_cut )
print('remove outliers',cf.nrows)

In [None]:
%%time
# sort the columnfile:
cf.reorder( np.lexsort( ( cf.gerr2, cf.dty, cf.l, cf.k, cf.h, cf.signY ) ) )

In [None]:
o=cf.nrows//2
ts = [t for t in cf.titles]
#ts.sort()
print(" ".join(["%10s"%(t[:10]) for t in ts]))
for i in range(o,o+10):
    print(" ".join(["%10.6g"%(cf[t][i]) for t in ts]))

In [None]:
# normalised intensity
# cf.addcolumn( cf.sum_intensity * cf.lf * cf.fpico6.mean() / cf.fpico6 , 'normalised_intensity')
avgmon = np.average(cf.monitor)
cf.addcolumn( cf.sum_intensity * cf.lf * avgmon / ( cf.monitor * cf.polarization)  , 'normalised_intensity')

In [None]:
def merge_peaks_hkl( cf ):
    """
    cf should have been sorted by h,k,l,signY already
    we are summing up blocks
    """
    cf.addcolumn(np.zeros(cf.nrows, dtype=int), 'merge_id')
    # First add a label saying which peak is which
    i = 0     # loop over table
    pkid = 0  # which merged peak is this
    while i < cf.nrows:   # until the end of the table
        start = i     # beginning of block
        key = cf.h[i], cf.k[i], cf.l[i], cf.signY[i]
        while i < cf.nrows and (cf.h[i], cf.k[i], cf.l[i], cf.signY[i]) == key:
            i += 1 # look for the next
        end = i
        cf.merge_id[start:end] = pkid
        pkid += 1
    # Now make a reduced columnfile
    weights = cf.sum_intensity
    wnorm  = np.bincount( cf.merge_id, weights = weights )
    newcf = {}        
    for title in cf.titles:
        if title.find( 'intensity' ) >= 0: # unit weights to sum
            newcf[title] = np.bincount( cf.merge_id, weights = cf[title] )
        else:
            newcf[title] = np.bincount( cf.merge_id, weights = cf[title] * weights )/wnorm
    mergedcf = ImageD11.columnfile.colfile_from_dict( newcf )
    return  mergedcf

In [None]:
%%time
mall = merge_peaks_hkl( cf )

In [None]:
mask = fabio.open(dset.maskfile).data

In [None]:
print(mall.nrows)
mclean = ImageD11.peakselect.filter_peaks_by_distance_to_mask( mall, mask )
print(mclean.nrows)

In [None]:
oclip = 2 # degrees cut
mclean.filter( mclean.omega > dset.obinedges[0]+oclip ) # peaks near the scan start
mclean.filter( mclean.omega < dset.obinedges[-1]-oclip) # peaks near the scan end
print("After removing peaks and scan ends",mclean.nrows)
mclean.filter( mclean.lf > mclean.lf.max() * 0.05 )
print("After removing peaks close to rotation axis",mclean.nrows)
mclean.filter( mclean.Number_of_pixels > 2 )
print("After removing peaks with only 4 pixels",mclean.nrows)

In [None]:
pl.figure()
pl.plot( mall.fc, mall.sc, 'x' )
pl.plot( mclean.fc, mclean.sc, '+' )

In [None]:
def write_shelx_mergeall( merged, fname, machine_err = 0.03 ):
    # FORMAT(3I4,2F8.2,I4) for h,k,l,Fo2,sigma(Fo2), and batch number
    sig = merged.normalised_intensity
    err = np.sqrt(sig+1) + machine_err * sig
    print(sig.mean(), sig.max())
    if sig.max() > 9.99e4:
        sc = 9.99e4/sig.max()
    else:
        sc = 1
    print(sc)
    pl.hist(sig,bins=200)
    with open(fname,'w') as fout:
        for i in range(merged.nrows):
#        ( h, k, l, s, sI ) in enumerate( merged ):
            fout.write("%4d%4d%4d%8.2f%8.2f%4d\n"%( 
                merged.h[i],
                merged.k[i],
                merged.l[i],
                sc*sig[i],
                sc*err[i],0))

In [None]:
write_shelx_mergeall( mclean, "si.hkl" )

In [None]:
w = cf.parameters.get('wavelength')
a = pow( np.linalg.det( g.ubi ), 1/3 )
with open('si.ins','w')as fins:
    fins.write(f"""TITL    52457-ICSD in space group F d -3 m S
CELL    {w:.5f}   {a:.5f}  {a:.5f}  {a:.5f}   90.000   90.000   90.000
ZERR     8        0.0000   0.0000   0.0000    0.000    0.000    0.000
LATT 4
SYMM X+1/4,-Z,Y+1/4
SYMM X+1/4,Z+1/4,-Y
SYMM Z+1/4,Y+1/4,-X
SYMM -Z,Y+1/4,X+1/4
SYMM -Y,X+1/4,Z+1/4
SYMM Y+1/4,-X,Z+1/4
SYMM Z,X,Y
SYMM Y,Z,X
SYMM -Y+1/4,-Z+1/4,X
SYMM Z,-X+1/4,-Y+1/4
SYMM -Y+1/4,Z,-X+1/4
SYMM -Z+1/4,-X+1/4,Y
SYMM -Z+1/4,X,-Y+1/4
SYMM Y,-Z+1/4,-X+1/4
SYMM X,-Y+1/4,-Z+1/4
SYMM -X+1/4,Y,-Z+1/4
SYMM -X+1/4,-Y+1/4,Z
SYMM Y+1/4,X+1/4,-Z
SYMM -Y,-X,-Z
SYMM Z+1/4,-Y,X+1/4
SYMM -Z,-Y,-X
SYMM -X,Z+1/4,Y+1/4
SYMM -X,-Z,-Y
SFAC     Si
UNIT       8
L.S.    10
ACTA
TEMP     18.00
FVAR    0.4788
EXTI
Si1     1   0.125   0.125   0.125  10.04167   0.00942
HKLF    4
END
""")

In [None]:
!/home/esrf/wright/bin/shelxl si

In [None]:
import ImageD11.sym_u

In [None]:
from CifFile import ReadCif 
cif = ReadCif('si.fcf')

In [None]:
block = cif['si']

In [None]:
ops = block['_space_group_symop_operation_xyz']
h = [int(v) for v in block['_refln_index_h']]
k = [int(v) for v in block['_refln_index_k']]
l = [int(v) for v in block['_refln_index_l']]
#fc = [float(v) for v in block['_refln_F_calc']]
fc = [float(v) for v in block['_refln_F_squared_calc']]
fo = [float(v) for v in block['_refln_F_squared_meas']]
len(fc)

In [None]:
pl.plot(fo, np.array(fc),'.', label = 'cro_test')
pl.xlabel('fobs')
pl.ylabel('fcalc')
pl.loglog()

In [None]:
grp = ImageD11.sym_u.group()

In [None]:
mops = list(set([ tuple((24*ImageD11.sym_u.m_from_string( sop ).ravel()).astype(int)) for sop in ops ]))

In [None]:
len(set(mops))

In [None]:
grp.group = [ np.array( o ).reshape((3,3)).astype(float)/24 for o in set( mops ) ]

In [None]:
assert( (-24,0,0,0,-24,0,0,0,-24) in mops) 

In [None]:
lut = {}
for i in range(len(h)):
    lut[ int(h[i]), int(k[i]), int(l[i]) ] = fc[i]   #**2
    for op in grp.group[1:]:
        sh, sk, sl = op.dot( (h[i], k[i], l[i]) )
        lut[ int(sh), int(sk), int(sl) ] = fc[i]    #**2

In [None]:
hkl = np.transpose( (mclean.h, mclean.k, mclean.l)).astype(int)
mclean.addcolumn( np.zeros(mclean.nrows), 'Icalc' )
bad = 0
for i in range(mclean.nrows):
    try:
        mclean.Icalc[i] = lut[ *tuple( hkl[i] ) ]
    except KeyError:
        bad += 1
        if bad < 10:
            print(hkl[i], end=' ')
        mclean.Icalc[i] = -1 # missing
print("...",bad,"reflections observed that should be absent (or zero)")
print("Set Icalc to -1 for these")

In [None]:
pl.figure()
pl.plot(  mclean.normalised_intensity, mclean.Icalc, ".")
pl.loglog()
pl.xlabel( 'calc from shelx')
pl.ylabel('observed from sinogram merge')

In [None]:
mclean.addcolumn( mclean.Icalc/ mclean.normalised_intensity, 'ratio' )

In [None]:
pl.figure()
pl.plot(  mclean.normalised_intensity, mclean.ratio , ".")
pl.loglog()
pl.xlabel( 'calc from shelx')
pl.ylabel('observed from sinogram merge')

In [None]:
import fabio
msk = fabio.open( dset.maskfile ).data
msk.min(), msk.max(), msk.mean()

In [None]:
f, a = pl.subplots(figsize=(12,12))
a.imshow( msk, origin='lower', cmap='gray_r', vmax=2, vmin=0)
a.plot( mall.fc, mall.sc, ',')
m = mclean.ratio > 10
if m.sum() > 1:
    f.colorbar(a.scatter( mclean.fc[m], mclean.sc[m], c = mclean.ratio[m], norm='log', cmap='jet'))
a.set(aspect='equal')


In [None]:
f, a = pl.subplots(figsize=(6,6))
a.plot( mclean.omega, mclean.ratio, '.' )
a.set(aspect='auto', xlabel='omega', ylabel='ratio', yscale='log' )

In [None]:
f, a = pl.subplots(figsize=(6,6))
a.plot( mclean.Number_of_pixels, mclean.ratio, '.' )
a.set(aspect='auto', xlabel='npx', ylabel='ratio', yscale='log', xscale='log', xlim=(1,100))

In [None]:
f, a = pl.subplots(figsize=(6,6))
a.plot( 1/mclean.lf, mclean.ratio, '.' )
a.set(aspect='auto', xlabel='Lorentz correction', ylabel='ratio', yscale='log', xscale='log' )

In [None]:
f, a = pl.subplots(figsize=(6,6))
a.scatter( mclean.eta, mclean.ratio, c = mclean.tth, s = pow( mclean.sum_intensity, 1/3)/10 )
a.set(aspect='auto', xlabel='eta', ylabel='ratio')

In [None]:
mclean.nrows