# 4a. XRF element detection limits
## Summary
This notebook calculates limits of detection for those deconvoluted and repacked spectra output from the `3_XRF_normalisation_and_repacking` notebook.

In [10]:
import pathlib
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt

import seaborn as sns

from shutil import copyfile
from matplotlib.ticker import ScalarFormatter

import sys
sys.path.insert(0, '..\\code\\')
import high_plex_hdf

Set input and output directories

In [12]:
# Set data directory to work from 
base_dir = "C:/Users/MerrickS/OneDrive/Work/2_UZH/Papers/1_MEZ_XRF"
base_dir = pathlib.Path(base_dir)

# Specify the input directory where hdf files to process are located
input_dir = base_dir / 'data' / 'processed' / 'xrf' / '3_norm_repacked_XRF_hdfs'

# Gather filepaths for preprocessed hdfs and config files for XRF fitting
hdf_filepaths = list(input_dir.glob('*.h5'))

# Make output directory segmented images
# out_dir = base_dir / 'data' / 'analysis' / 'xrf' / 'xrf_detection_limits'
out_dir = base_dir / 'data' / 'analysis' / '4_xrf_detection_limits'

out_dir.mkdir(parents=True, exist_ok=True)
print('Detection limits will be output to: \n\t', out_dir) 

# Collect XRF and IMC scan metadata
df_xrf_hdf_files = pd.read_csv(base_dir / 'data' / 'processed' / 'xrf' / '1_reduced_reshaped_hdfs' / 'preprocessed_hdf_config_files.csv')
df_xrf_metadata = pd.read_csv(base_dir / 'data' / 'processed' /  'xrf' / '1_reduced_reshaped_hdfs' / 'xrf_scan_metadata_full.csv')

# Import periodic table csv
df_periodic_table = pd.read_csv(base_dir / 'data' / 'raw' / 'xrf' / 'Periodic Table of Elements.csv')

Detection limits will be output to: 
	 C:\Users\MerrickS\OneDrive\Work\2_UZH\Papers\1_MEZ_XRF\data\analysis\4_xrf_detection_limits


In [13]:
# Get scan names for gelatin standard detection limits
df_detector_standard_scans = df_xrf_metadata[
    (df_xrf_metadata['sample_concentration'].notnull())
    & (df_xrf_metadata['sample_roi'] == 'gelatin centre')
]


In [14]:
df_detector_standard_scans['detector'].unique()

array(['SDD', 'GeCMOS', 'GeCMOS (2nd)'], dtype=object)

In [15]:
df_example_scans = df_detector_standard_scans[
    (df_detector_standard_scans['sample_concentration'] == 200)
    & (df_detector_standard_scans['dwell_ms'] == 1000)
]

In [16]:
df_example_scans

Unnamed: 0,scan_name,sample_name_ref,sample_name,sample_type,sample_roi,beamline,beamtime,scanset,scatter_set,dwell_ms,...,sample_concentration,detector,dual_detector,AB_panel,config_file,hdf_filepath,hdf_full_fpaths,hdf_path,hdf_file,hdf_filename
8,102_t_0001,,sample102_t,gelatin standard,gelatin centre,ESRF-ID15A,LS2893_1,,1,1000.0,...,200.0,SDD,1.0,,21_07_07_SDD_gelatin_ls2893_1.cfg,C:\Users\MerrickS\OneDrive\Work\2_UZH\Papers\1...,C:\Users\MerrickS\OneDrive\Work\2_UZH\Papers\1...,C:\Users\MerrickS\OneDrive\Work\2_UZH\Papers\1...,sample102_t_0001.h5,sample102_t_0001
30,sample102_t_3_0001,,sample102_t,gelatin standard,gelatin centre,ESRF-ID15A,LS2893_3,,7,1000.0,...,200.0,GeCMOS,0.0,,21_07_07_GeCMOS_gelatin.cfg,C:\Users\MerrickS\OneDrive\Work\2_UZH\Papers\1...,C:\Users\MerrickS\OneDrive\Work\2_UZH\Papers\1...,C:\Users\MerrickS\OneDrive\Work\2_UZH\Papers\1...,sample102_t_3_0001.h5,sample102_t_3_0001


In [17]:
example_spectra = {}
for fpath in df_example_scans.hdf_full_fpaths:
    det = df_example_scans.loc[
        df_example_scans['hdf_full_fpaths'] == fpath, 'detector'
    ].iloc[0]
    with h5py.File(fpath, 'r') as hdf:
        # select first 20 spectra for each scan
        example_spectra[det] = hdf['1.1/measurement/falconx_det0'][()][:20, :]  


At this point, these spectra are fitted and export to csvs using the PyMCA GUI using their respective config files. The SDD and GeCMOS .csv files are found in `out_dir`.

In [18]:
df_sdd_spectra = pd.read_csv(list(out_dir.glob('*SDD*.csv'))[0])
df_sdd_spectra['Detector'] = 'SDD'
df_gecmos_spectra = pd.read_csv(list(out_dir.glob('*GeCMOS*.csv'))[0])
df_gecmos_spectra['Detector'] = 'GeCMOS'
df_gecmos_spectra['counts'] = df_gecmos_spectra['counts']*3.1 # to correct for solid angle
df_select_spectra = pd.concat([df_sdd_spectra, df_gecmos_spectra]).reset_index(drop=True)

IndexError: list index out of range

In [9]:
df_gecmos_spectra

NameError: name 'df_gecmos_spectra' is not defined

In [None]:
sns.set_style("dark")

fig, ax = plt.subplots(figsize=(10,3), dpi=100)
g = sns.lineplot(
    data=df_select_spectra,
    x='Energy',
    y='counts',
    hue='Detector',
    style = 'Detector',
    dashes = ['', (3,1)],
    palette=['black', 'blue'],
    ax=ax,
    legend='full'
)

ax.set(yscale="log")
ax.yaxis.set_major_formatter(ScalarFormatter())
ax.set_ylim([0.5,5000])
#ax.set_yticks(list(range(0.5, 900, 200)))
#ax.set_yticks(list(range(0, 900, 50)), minor=True)

ax.set_xlim([0,70])
ax.set_xticks(list(range(0, 80, 10)))
ax.set_xticks(list(range(0, 70, 2)), minor=True)

#Set gridlines
ax.grid(which='major', c='w', lw=3, axis='y')
ax.grid(which='major', c='w', lw=2, axis='x')
ax.grid(which='minor', c='w', lw=1, axis='y')
ax.grid(which='minor', c='w', lw=1, axis='x')

plt.legend(title = 'Detector', bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
plt.xlabel('Energy (keV)')
plt.ylabel('XRF detector counts')

savename = out_dir / 'Fig1c_Spectra.svg'
print(savename)
plt.savefig(savename, bbox_inches='tight')
plt.show()


In [None]:
hdf_fpath = out_dir / '20_spectra_per_detector.h5'

with h5py.File(hdf_fpath, 'a') as hdf:
    for det in example_spectra:
        if det in hdf:
            hdf[det][...] = example_spectra[det]
        else:
            hdf[det] = example_spectra[det]
           
print('Example spectra exported to ', hdf_fpath)

In [None]:
# Make a dataframe of measurements per concentration for each detector
columns = [
    'detector',
    'element_emission_line', 
    'dwell_ms',
    'concentration_ppm', 
    'img_mean', 
    'img_std', 
    'x_dim', 
    'y_dim',
    'hdf_filepath'    
] 

df_standard_measurements = pd.DataFrame(columns=columns)

for scan in df_detector_standard_scans['hdf_filename']:
    row_dict = dict.fromkeys(columns)
    
    row_metadata = df_detector_standard_scans[
        df_detector_standard_scans['hdf_filename'] == scan
    ]
    
    # Add hdf metadata to row_dict dictionary
    row_dict['hdf_filepath'] = list(input_dir.glob(f'{scan}*.h5'))[0]
    row_dict['concentration_ppm'] = row_metadata['sample_concentration'].iloc[0]
    row_dict['dwell_ms'] = row_metadata['dwell_ms'].iloc[0]
    row_dict['detector'] = row_metadata['detector'].iloc[0]
    
    # Get channel names for hdf
    df_channels = pd.read_hdf(row_dict['hdf_filepath'], key = 'images/df_channel_metadata')
    
    # Get channel data for hdf and add to row_dict dictionary
    with h5py.File(row_dict['hdf_filepath'], 'r') as hdf:
        image_stack = hdf['images/raw'][()]

    for channel in range(image_stack.shape[-1]):
        img = image_stack[:20,:20,channel] # makes image sizes the same for all measures, selects chanels
        assert len(img.shape) == 2      

        row_dict['element_emission_line'] = df_channels['plot_channel'].iloc[channel]
        row_dict['img_mean'] = img.mean()
        row_dict['img_std'] = img.std()
        row_dict['x_dim'] = img.shape[0]
        row_dict['y_dim'] = img.shape[-1]

        df_standard_measurements = df_standard_measurements.append(row_dict, ignore_index=True)


In [None]:
df_standard_measurements

In [None]:
df_standard_measurements['log_concentration'] = np.log(df_standard_measurements['concentration_ppm'] + 1)
df_standard_measurements['log_mean_counts'] = np.log(df_standard_measurements['img_mean'] + 1)

In [None]:
emission_lines = [
    'Y_Ka',
    'Ag_Ka',
    'In_Ka',
    'La_Ka',
    'Ce_Ka',
    'Pr_Ka',
    'Nd_Ka',
    'Sm_Ka',
    'Eu_Ka',
    'Gd_Ka',
    'Tb_Ka',
    'Dy_Ka',
    'Ho_Ka',
    'Er_Ka',
    'Tm_Ka',
    'Yb_Ka',
    'Lu_Ka',
    'Ir_L3',    
]


In [None]:
df_standard_measurements[df_standard_measurements['dwell_ms'] == 10]

In [None]:
df_standard_measurements['detector'].unique()

In [None]:
from numpy.polynomial import Polynomial

limits = [
    'Detector', 
    'dwell_ms',
    'element_emission_line', 
    'blank_counts', 
    'blank_std', 
    'lob_intercept', 
    'lod_intercept', 
    'loq_intercept',
    'lob_concentration', 
    'lod_concentration', 
    'loq_concentration', 
]

df_detection_limits = pd.DataFrame(columns=limits)

for detector in df_standard_measurements['detector'].unique():
    df_detector = df_standard_measurements[
        df_standard_measurements['detector'] == detector
    ]
    
    for dwell_ms in df_detector['dwell_ms'].unique():
            df_detector_dwell = df_detector[
                df_detector['dwell_ms'] == dwell_ms
            ]
        
            for emission_line in emission_lines:
                limits_dict = dict.fromkeys(limits)
                limits_dict['Detector'] = detector
                limits_dict['dwell_ms'] = dwell_ms

                df_emission_line = df_detector_dwell[
                    (df_detector_dwell['element_emission_line'] == emission_line)
                    & (df_detector_dwell['log_concentration'] != 0)
                    #& (df_detector_dwell['log_mean_counts'] > 0)
                ]

                df_emission_line_blank = df_detector_dwell[
                        (df_detector_dwell['element_emission_line'] == emission_line)
                        & (df_detector_dwell['concentration_ppm'] == 0)
                    ]

                # This condition sets the La_Ka, Ce_Ka and Pr_Ka blank to that of Nd_Ka blank to Ce for GeCMOS detector
                if (detector == 'GeCMOS') & (emission_line in ['La_Ka', 'Ce_Ka', 'Nd_Ka']):
                    df_emission_line_blank = df_detector_dwell[
                        (df_detector_dwell['element_emission_line'] == 'Pr_Ka')
                        & (df_detector_dwell['concentration_ppm'] == 0)
                    ]

                limits_dict['element_emission_line'] = emission_line
                limits_dict['blank_counts'] = df_emission_line_blank['img_mean'].iloc[0]
                limits_dict['blank_std'] = df_emission_line_blank['img_std'].iloc[0]

                x_data = df_emission_line['log_concentration']
                y_data = df_emission_line['log_mean_counts']

                x1 = np.sort(x_data, 0)
                y1 = np.sort(y_data, 0)
                plt.plot(x1, y1, 'o')

                p = Polynomial.fit(x_data, y_data, deg=1)        
                x2 = np.sort(x_data, 0)
                y2 = np.sort(p(x_data), 0)
                plt.plot(x2, y2)

                lob = np.log(limits_dict['blank_counts'] + 1.645*limits_dict['blank_std'] + 1)
                lod = np.log(limits_dict['blank_counts'] + 3.33*limits_dict['blank_std'] + 1)
                loq = np.log(limits_dict['blank_counts'] + 10*limits_dict['blank_std'] + 1)

                plt.hlines(lob, xmin = min(x_data), xmax = max(x_data), linestyles = 'dashed')
                plt.hlines(lod, xmin = min(x_data), xmax = max(x_data), linestyles = 'dashdot')
                plt.hlines(loq, xmin = min(x_data), xmax = max(x_data), linestyles = 'dotted')

                # Record concentration where regression line intercepts limit of choice
                limits_dict['lob_intercept'] = (p-lob).roots()[-1]
                limits_dict['lod_intercept'] = (p-lod).roots()[-1]
                limits_dict['loq_intercept'] = (p-loq).roots()[-1]

                # Convert to conecntrations
                limits_dict['lob_concentration'] = np.exp(limits_dict['lob_intercept'] - 1)
                limits_dict['lod_concentration'] = np.exp(limits_dict['lod_intercept'] - 1)
                limits_dict['loq_concentration'] = np.exp(limits_dict['loq_intercept'] - 1)

                plt.xlabel('log [concentration+1]')
                plt.ylabel('log [mean_counts+1]')
                plt.title(f'{emission_line} | {detector} | {dwell_ms} ms')
                plt.show()

                df_detection_limits = df_detection_limits.append(limits_dict, ignore_index=True)



Add in the key columns for plotting out detection limits

In [None]:
df_detection_limits['element'] = (
    df_detection_limits.element_emission_line.str.split(
        '_', expand=True).iloc[:,0]
)
df_detection_limits['emission_line'] = (
    df_detection_limits.element_emission_line.str.split(
        '_', expand=True).iloc[:,-1]
)

df_detection_limits = pd.merge(
    df_periodic_table[['AtomicNumber', 'Symbol']], 
    df_detection_limits, 
    how='right', 
    left_on='Symbol', 
    right_on='element'
)

df_detection_limits['Raster rate (Hz)'] = 1000/df_detection_limits['dwell_ms']


In [None]:
df_detection_limits.columns

Plot out detection limits

In [None]:
sns.set_style("dark")

fig, ax = plt.subplots(figsize=(10,4), dpi=300)
sns.scatterplot(
    #data=df_detection_limits,
    data=df_detection_limits[df_detection_limits['Detector'].isin(['SDD', 'GeCMOS'])],
    x='AtomicNumber',
    y='lob_concentration',
    hue='Raster rate (Hz)',
    palette='Dark2',
    style='Detector',  
    ax=ax,
    s=100
)

#Set y to log scale with plain formatting
from matplotlib.ticker import ScalarFormatter
ax.set(yscale="log")
ax.set_ylim([0.05,150000])
#ax.yaxis.set_major_formatter(ScalarFormatter())

ax.set_xticks(list(range(35, 80, 5)))
ax.set_xticks(list(range(35, 80, 1)), minor=True)
ax.set_xlim([36,80])

#Set gridlines
ax.grid(which='major', c='w', lw=3, axis='y')
ax.grid(which='major', c='w', lw=3, axis='x')
ax.grid(which='minor', c='w', lw=1, axis='y')
ax.grid(which='minor', c='w', lw=1, axis='x')

plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
plt.xlabel('Atomic Number')
plt.ylabel('Limit of detection (ppm)')

plt.show()


In [None]:
df_detection_limits['Detector'].unique()

In [None]:
df_detection_limits['Raster rate (Hz)'].unique()

In [None]:
df_plot = df_detection_limits[
        (df_detection_limits['Detector'].isin(['SDD', 'GeCMOS (2nd)'])) &
        (df_detection_limits['Raster rate (Hz)'].isin([1, 10, 50, 250, 1000]))
].copy()
df_plot['Detector'] = df_plot['Detector'].replace('GeCMOS (2nd)', 'GeCMOS')

In [None]:
sns.set_style("dark")

fig, ax = plt.subplots(figsize=(10,4), dpi=300)
sns.scatterplot(
    #data=df_detection_limits,
    data=df_detection_limits[(df_detection_limits['Raster rate (Hz)'].isin([1, 10, 50, 250, 1000]))],
    x='AtomicNumber',
    y='lob_concentration',

    #palette = sns.color_palette("dark:salmon_r", as_cmap=True),
    #style='Detector',  
    #hue='Raster rate (Hz)',

    palette=['black', 'blue', 'green'],
    style='Raster rate (Hz)',  
    hue='Detector',

    ax=ax,
    s=50
)

#Set y to log scale with plain formatting
from matplotlib.ticker import ScalarFormatter
import matplotlib.ticker as ticker

ax.set(yscale="log")
ax.set_ylim([0.1,100])
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)))

ax.set_xticks(list(range(35, 85, 5)))
ax.set_xticks(list(range(35, 80, 1)), minor=True)
ax.set_xlim([35,80])

#Set gridlines
ax.grid(which='major', c='w', lw=3, axis='y')
ax.grid(which='major', c='w', lw=3, axis='x')
ax.grid(which='minor', c='w', lw=1, axis='y')
ax.grid(which='minor', c='w', lw=1, axis='x')

plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
plt.xlabel('Atomic Number')
plt.ylabel('Limit of detection (ppm)')

savename = out_dir / 'Fig1d_LimitOfDetection.svg'
print(savename)
plt.savefig(savename, bbox_inches='tight')

plt.show()

In [None]:
sns.set_style("dark")

fig, ax = plt.subplots(figsize=(10,4), dpi=300)
sns.scatterplot(
    #data=df_detection_limits,
    data=df_plot[(df_plot['Raster rate (Hz)'].isin([1, 10, 50, 250, 1000]))],
    x='AtomicNumber',
    y='lob_concentration',

    #palette = sns.color_palette("dark:salmon_r", as_cmap=True),
    #style='Detector',  
    #hue='Raster rate (Hz)',

    palette=['black', 'blue'],
    style='Raster rate (Hz)',  
    hue='Detector',

    ax=ax,
    s=100
)

#Set y to log scale with plain formatting
from matplotlib.ticker import ScalarFormatter
ax.set(yscale="log")
ax.set_ylim([0.07,17])
ax.yaxis.set_major_formatter(ScalarFormatter())



ax.set_xticks(list(range(35, 80, 5)))
ax.set_xticks(list(range(35, 80, 1)), minor=True)
ax.set_xlim([36,80])

#Set gridlines
ax.grid(which='major', c='w', lw=3, axis='y')
ax.grid(which='major', c='w', lw=3, axis='x')
ax.grid(which='minor', c='w', lw=1, axis='y')
ax.grid(which='minor', c='w', lw=1, axis='x')

plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
plt.xlabel('Atomic Number')
plt.ylabel('Limit of detection (ppm)')

plt.show()

In [None]:
sns.set_style("dark")

fig, ax = plt.subplots(figsize=(10,3), dpi=300)
sns.scatterplot(
    #data=df_detection_limits,
    data=df_detection_limits[df_detection_limits['Detector'].isin(['GeCMOS (2nd)'])],
    x='AtomicNumber',
    y='lob_concentration',
    hue='Raster rate (Hz)',
    palette='Dark2',
    style='Detector',
    markers={'GeCMOS (2nd)':'v'},  
    ax=ax,
    s=100
)

#Set y to log scale with plain formatting
from matplotlib.ticker import ScalarFormatter
ax.set(yscale="log")
ax.set_ylim([0.05,15])
#ax.yaxis.set_major_formatter(ScalarFormatter())

ax.set_xticks(list(range(35, 80, 5)))
ax.set_xticks(list(range(35, 80, 1)), minor=True)
ax.set_xlim([36,80])

#Set gridlines
ax.grid(which='major', c='w', lw=3, axis='y')
ax.grid(which='major', c='w', lw=3, axis='x')
ax.grid(which='minor', c='w', lw=1, axis='y')
ax.grid(which='minor', c='w', lw=1, axis='x')

plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
plt.xlabel('Atomic Number')
plt.ylabel('Limit of detection (ppm)')

plt.show()

In [None]:
sns.set_style("dark")

fig, ax = plt.subplots(figsize=(10,4), dpi=300)
sns.scatterplot(
    #data=df_detection_limits,
    data=df_detection_limits[df_detection_limits['Detector'].isin(['GeCMOS (2nd)'])],
    x='AtomicNumber',
    y='lob_concentration',
    hue='Raster rate (Hz)',
    palette='Dark2',
    style='Detector',  
    ax=ax,
    s=50
)

#Set y to log scale with plain formatting
from matplotlib.ticker import ScalarFormatter
ax.set(yscale="log")
ax.set_ylim([0.05,150])
#ax.yaxis.set_major_formatter(ScalarFormatter())

ax.set_xticks(list(range(35, 80, 5)))
ax.set_xticks(list(range(35, 80, 1)), minor=True)
ax.set_xlim([36,80])

#Set gridlines
ax.grid(which='major', c='w', lw=3, axis='y')
ax.grid(which='major', c='w', lw=3, axis='x')
ax.grid(which='minor', c='w', lw=1, axis='y')
ax.grid(which='minor', c='w', lw=1, axis='x')

plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
plt.xlabel('Atomic Number')
plt.ylabel('Limit of detection (ppm)')

plt.show()

In [None]:
df1 = df_detection_limits[df_detection_limits['Detector'] == 'SDD']
df2 = df_detection_limits[df_detection_limits['Detector'] == 'GeCMOS']
df3 = df_detection_limits[df_detection_limits['Detector'] == 'GeCMOS (2nd)']

In [None]:
for element in df1['Symbol']:
    sdd = df1.lob_concentration[df1['Symbol'] == element].iloc[0]
    gecmos = df2.lob_concentration[df2['Symbol'] == element].iloc[0]
    gecmos2 = df3.lob_concentration[df3['Symbol'] == element].iloc[0]

    print(element, sdd/gecmos, sdd/gecmos2)

In [None]:
df2[['Symbol', 'lob_concentration']]