<a href="https://colab.research.google.com/github/OpenXRF/lead-screening/blob/main/Figure4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

OpenXRF_logo_red.svg

# **Towards low-cost lead screening with transmission XRF**
---

This notebook reproduces the results in **Figure 3** of our paper:


>  [C. Gaßner, J. Reisewitz, J. E. Forsyth, K. Shaker, "Towards low-cost lead screening with transmission XRF" arXiv:2511.09110 (2025)](https://doi.org/10.48550/arXiv.2511.09110)


More information can be found here:

*   Project website: [openxrf.org](https://openxrf.org/)
*   GitHub repository: [github.com/OpenXRF/lead-screening](https://github.com/OpenXRF/lead-screening/)


---


In [2]:
#@title Install packages { display-mode: "form" }
# @markdown Python packages for data processing and visualization

import os
import io
import zipfile
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from time import time
from scipy.optimize import curve_fit

print("All packages installed successfully!")

All packages installed successfully!


# **Figure 4:** Required measurement time as a function of detector specifications


---

* Callculating SNR for 100, 200 ppm Pb in soil
* 2h max exposure for all ppm values
* Data is provided in Github
* Different settings for efficency, resolution and detector size

-> Script computes the visualized data and displays the plot itself in the last cell...


In [5]:
#@title 1) Load Data
# @markdown Load simulated data from Github

start_time = time()

!rm -rf sim_data
!mkdir -p sim_data
!wget -O sim_data/Figure3-4.zip "https://github.com/OpenXRF/lead-screening/raw/main/data/Figure3-4.zip"
!unzip -d sim_data sim_data/Figure3-4.zip

#zip_url = "https://github.com/OpenXRF/lead-screening/raw/main/data/Figure3-4.zip"

target_dir = "sim_data"

#os.makedirs(target_dir, exist_ok=True)

#print("Downloading ZIP with simulated data...")
#resp = requests.get(zip_url)
#resp.raise_for_status()

#with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
#    zf.extractall(target_dir)

print("ZIP downloaded and extracted into:", target_dir)

def read_dataset(base_conc, base_dir=target_dir):
    """
    Reads the 7 data csv files for one ppm concentration
    Output: {'S0': df, ..., 'S6': df}
    """
    ds = {}
    base_dir = os.path.join(base_dir, "Figure 3-4")
    for source_number in range(7):
        filename = f"data_{base_conc}ppm_S{source_number}.csv"
        file_path = os.path.join(base_dir, filename)

        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File not found: {file_path}")

        df = pd.read_csv(
            file_path,
            usecols=["EventID", "Particle", "Type", "Energy(MeV)", "x(mm)", "y(mm)"])
        ds[f"S{source_number}"] = df

    return ds

rng = np.random.default_rng(0)

print("Reading simulated data from extracted ZIP...")

c_vals = np.array([10, 50, 100, 200, 500, 1000])  # ppm

data100ppm  = read_dataset(100)
print('Done 100ppm')
data200ppm  = read_dataset(200)
print('Done 200ppm')

time_read = time() - start_time

--2025-11-16 18:12:55--  https://github.com/OpenXRF/lead-screening/raw/main/data/Figure3-4.zip
Resolving github.com (github.com)... 140.82.116.4
Connecting to github.com (github.com)|140.82.116.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://media.githubusercontent.com/media/OpenXRF/lead-screening/main/data/Figure3-4.zip [following]
--2025-11-16 18:12:55--  https://media.githubusercontent.com/media/OpenXRF/lead-screening/main/data/Figure3-4.zip
Resolving media.githubusercontent.com (media.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to media.githubusercontent.com (media.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1871461965 (1.7G) [application/zip]
Saving to: ‘sim_data/Figure3-4.zip’


2025-11-16 18:13:23 (63.8 MB/s) - ‘sim_data/Figure3-4.zip’ saved [1871461965/1871461965]

Archive:  sim_data/Figure3-4.zip
   creating: sim_data/Fig

In [8]:
#@title 2) Analyse experimental parameters { display-mode: "form" }

c = [100, 200]
t1 = time()
FWHM_keV_base = 0.15  # [keV]
detector_area_base = 50.0  # [mm²]
detector_radius_base = np.sqrt(detector_area_base / np.pi)

binwidth = 0.03  # [keV]
E = np.arange(0, 75 + binwidth, binwidth)

# Peak-Parameter
peak_alpha_E = 10.55  # [keV]
peak_beta_E = 12.65   # [keV]

# Parameter-Ranges
resolution_values = [0.15, 0.25, 0.35, 0.50]  # [keV]
efficiency_values = [1.00, 0.75, 0.50, 0.25]
datasets_base = [data100ppm, data200ppm]

# preprocessed[res_idx][conc_idx] -> Dict mit S0..S6, alpha_window, beta_window
preprocessed = [[None for _ in c] for _ in resolution_values]

for res_idx, FWHM in enumerate(resolution_values):
    peak_window = FWHM
    alpha_window = (E >= peak_alpha_E - peak_window) & (E <= peak_alpha_E + peak_window)
    beta_window = (E >= peak_beta_E - peak_window) & (E <= peak_beta_E + peak_window)

    for conc_idx, data_conc in enumerate(datasets_base):
        entry = {"alpha_window": alpha_window, "beta_window": beta_window}
        for s in range(7):
            field = f"S{s}"
            df = data_conc[field]

            df["E"] = df["Energy(MeV)"] * 1000.0 + (FWHM / 2.355) * np.random.randn(len(df))

            df["r_hit"] = np.sqrt(df["x(mm)"]**2 + df["y(mm)"]**2)

            for eff_idx, eff_val in enumerate(efficiency_values):
                mask_name = f"eff_mask_{eff_idx+1}"
                df[mask_name] = np.random.rand(len(df)) <= eff_val

            entry[field] = df

        preprocessed[res_idx][conc_idx] = entry

time_preprocess = time() - t1
print(f"Pre-processing done! Took {time_preprocess:.0f} seconds")

fluence = 995 * np.pi * 2 * (1 - np.cos(np.pi / 4))  # [ph/s]
step_size_min = 5  # [min]
max_event = 180 * 60 * fluence
step_size = step_size_min * 60 * fluence
num_steps = int(np.floor(max_event / step_size))
time_values = np.arange(1, num_steps + 1) * step_size_min  # [min]

area_values = [50.0, 40.0, 25.0, 12.5]  # [mm²]
detector_radii = np.sqrt(np.array(area_values) / np.pi)

LOD_results = {
    "efficiency": np.zeros((len(c), len(efficiency_values))),
    "efficiency_err": np.zeros((len(c), len(efficiency_values))),
    "resolution": np.zeros((len(c), len(resolution_values))),
    "resolution_err": np.zeros((len(c), len(resolution_values))),
    "area": np.zeros((len(c), len(area_values))),
    "area_err": np.zeros((len(c), len(area_values))),
}

def calculateLOD_optimized(data_struct, eff_idx, detector_radius,
                           time_values, num_steps, step_size,
                           E, binwidth):
    alpha_window = data_struct["alpha_window"]
    beta_window = data_struct["beta_window"]

    signal_over_time = np.zeros(num_steps, dtype=float)
    background_over_time = np.zeros(num_steps, dtype=float)
    SNR_combined = np.zeros(num_steps, dtype=float)

    eff_field = f"eff_mask_{eff_idx+1}"

    filtered_data = []
    for s in range(7):
        field = f"S{s}"
        df = data_struct[field]
        base_mask = (
            df[eff_field] &
            (df["Particle"] == "gamma") &
            (df["r_hit"] <= detector_radius)
        )
        filtered_data.append(df.loc[base_mask])

    for step in range(num_steps):
        end_event = (step + 1) * step_size

        total_alpha_secondary = 0
        total_alpha_primary = 0
        total_beta_secondary = 0
        total_beta_primary = 0

        for s in range(7):
            df = filtered_data[s]
            event_mask = df["EventID"] < end_event
            data_step = df.loc[event_mask]

            if data_step.empty:
                continue

            is_primary = data_step["Type"] == "Primary"
            primary_E = data_step.loc[is_primary, "E"].to_numpy()
            secondary_E = data_step.loc[~is_primary, "E"].to_numpy()

            primary_hist, _ = np.histogram(primary_E, bins=np.append(E, E[-1] + binwidth))
            secondary_hist, _ = np.histogram(secondary_E, bins=np.append(E, E[-1] + binwidth))

            total_alpha_primary += primary_hist[alpha_window].sum()
            total_alpha_secondary += secondary_hist[alpha_window].sum()
            total_beta_primary += primary_hist[beta_window].sum()
            total_beta_secondary += secondary_hist[beta_window].sum()

        total_secondary = total_alpha_secondary + total_beta_secondary
        total_primary = total_alpha_primary + total_beta_primary

        signal_over_time[step] = total_secondary
        background_over_time[step] = total_primary

        if (total_secondary + total_primary) > 0:
            SNR_combined[step] = total_secondary / np.sqrt(total_secondary + total_primary)
        else:
            SNR_combined[step] = 0.0

    def sqrt_model(t, A):
        return A * np.sqrt(t)

    t_data = np.asarray(time_values, dtype=float)
    y_data = np.asarray(SNR_combined, dtype=float)

    popt, _ = curve_fit(sqrt_model, t_data, y_data, p0=[1.0], maxfev=10000)
    A_value = popt[0]

    t_LOD = 9.0 / A_value**2  # SNR = 3 → t = 9/A²

    def linear_model(t, A):
        return A * t

    s_fit, _ = curve_fit(linear_model, t_data, signal_over_time, p0=[1.0], maxfev=10000)
    b_fit, _ = curve_fit(linear_model, t_data, background_over_time, p0=[1.0], maxfev=10000)

    s = s_fit[0]
    b = b_fit[0]
    t = t_LOD

    sigma_t = 9.0 * np.sqrt(((s + 2 * b)**2 / s**5) * (1.0 / t) + b / s**4 * (1.0 / t))
    return t_LOD, sigma_t

print("\n=== Evaluating Detection Efficiency ===")
t2 = time()

for eff_idx, eff_val in enumerate(efficiency_values):
    print(f"Processing efficiency: {eff_val*100:.0f}%")
    for conc_idx, conc_val in enumerate(c):
        data_struct = preprocessed[0][conc_idx]  # Baseline-Auflösung (index 0 → 0.15 keV)
        t_lod, sigma = calculateLOD_optimized(
            data_struct, eff_idx,
            detector_radius_base,
            time_values, num_steps, step_size,
            E, binwidth
        )
        LOD_results["efficiency"][conc_idx, eff_idx] = t_lod
        LOD_results["efficiency_err"][conc_idx, eff_idx] = sigma

time_eff = time() - t2
print(f"Efficiency evaluation done! Took {time_eff:.0f} seconds")

print("\n=== Evaluating Spectral Resolution ===")
t3 = time()

for res_idx, res_val in enumerate(resolution_values):
    print(f"Processing resolution: {res_val*1000:.0f} eV")
    for conc_idx, conc_val in enumerate(c):
        data_struct = preprocessed[res_idx][conc_idx]
        t_lod, sigma = calculateLOD_optimized(
            data_struct, 0,  # eff_idx = 0 → 100%
            detector_radius_base,
            time_values, num_steps, step_size,
            E, binwidth
        )
        LOD_results["resolution"][conc_idx, res_idx] = t_lod
        LOD_results["resolution_err"][conc_idx, res_idx] = sigma

time_res = time() - t3
print(f"Resolution evaluation done! Took {time_res:.0f} seconds")

print("\n=== Evaluating Detector Area ===")
t4 = time()

for area_idx, area in enumerate(area_values):
    print(f"Processing area: {area:.1f} mm²")
    for conc_idx, conc_val in enumerate(c):
        data_struct = preprocessed[0][conc_idx]  # baseline resolution & eff
        t_lod, sigma = calculateLOD_optimized(
            data_struct, 0,  # 100% Effizienz
            detector_radii[area_idx],
            time_values, num_steps, step_size,
            E, binwidth
        )
        LOD_results["area"][conc_idx, area_idx] = t_lod
        LOD_results["area_err"][conc_idx, area_idx] = sigma

time_area = time() - t4
print(f"Area evaluation done! Took {time_area:.0f} seconds")

print("\n=== Computation Time Summary ===")
print(f"Data reading: {time_read:.1f} s")
print(f"Pre-processing: {time_preprocess:.1f} s")
print(f"Efficiency evaluation: {time_eff:.1f} s")
print(f"Resolution evaluation: {time_res:.1f} s")
print(f"Area evaluation: {time_area:.1f} s")
print(f"Total time: {time_read + time_preprocess + time_eff + time_res + time_area:.1f} s")


Pre-processing done! Took 16 seconds

=== Evaluating Detection Efficiency ===
Processing efficiency: 100%
Processing efficiency: 75%
Processing efficiency: 50%
Processing efficiency: 25%
Efficiency evaluation done! Took 309 seconds

=== Evaluating Spectral Resolution ===
Processing resolution: 150 eV
Processing resolution: 250 eV
Processing resolution: 350 eV
Processing resolution: 500 eV
Resolution evaluation done! Took 485 seconds

=== Evaluating Detector Area ===
Processing area: 50.0 mm²
Processing area: 40.0 mm²
Processing area: 25.0 mm²
Processing area: 12.5 mm²
Area evaluation done! Took 326 seconds

=== Computation Time Summary ===
Data reading: 217.3 s
Pre-processing: 16.4 s
Efficiency evaluation: 309.3 s
Resolution evaluation: 485.5 s
Area evaluation: 326.3 s
Total time: 1354.8 s


In [15]:
#@title 3) Plot LOD and SRN curve { display-mode: "form" }

from plotly import colors

baseline_eff  = LOD_results["efficiency"][:, 0]
baseline_res  = LOD_results["resolution"][:, 0]
baseline_area = LOD_results["area"][:, 0]

color_seq = colors.qualitative.Plotly
if len(color_seq) < len(c):
    color_seq = (color_seq * (len(c) // len(color_seq) + 1))[:len(c)]

fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=(
        "Effect of Detection Efficiency",
        "Effect of Spectral Resolution",
        "Effect of Detector Area"
    ),
    horizontal_spacing=0.08
)

x_eff = np.array(efficiency_values) * 100.0  # in %

for conc_idx, conc_val in enumerate(c):
    y  = LOD_results["efficiency"][conc_idx, :]
    yerr = LOD_results["efficiency_err"][conc_idx, :]
    base = baseline_eff[conc_idx]

    fig.add_trace(
        go.Scatter(
            x=x_eff,
            y=y,
            mode="lines+markers",
            line=dict(dash="dot", width=1),
            marker=dict(size=6, color=color_seq[conc_idx]),
            error_y=dict(
                type="data",
                array=yerr,
                visible=True
            ),
            name=f"{conc_val} ppm",
            showlegend=True
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=[25, 100],
            y=[base, base],
            mode="lines",
            line=dict(color="rgba(80,80,80,0.8)", dash="dash", width=0.8),
            showlegend=False
        ),
        row=1, col=1
    )

    for xv, yv in zip(x_eff, y):
        factor = yv / base if base > 0 else np.nan
        fig.add_annotation(
            x=xv - 3,
            y=yv + 4,
            text=f"{factor:.1f}x",
            showarrow=False,
            font=dict(size=8),
            xanchor="right",
            row=1, col=1
        )

fig.update_xaxes(
    title_text="Detection Efficiency (%)",
    tickvals=sorted(x_eff),
    range=[105, 20],  # invert_xaxis wie in Matplotlib
    row=1, col=1
)
fig.update_yaxes(
    title_text="LOD Time (minutes)",
    range=[0, 300],
    row=1, col=1
)

x_res = np.array(resolution_values) * 1000.0  # in eV

for conc_idx, conc_val in enumerate(c):
    y  = LOD_results["resolution"][conc_idx, :]
    yerr = LOD_results["resolution_err"][conc_idx, :]
    base = baseline_res[conc_idx]

    fig.add_trace(
        go.Scatter(
            x=x_res,
            y=y,
            mode="lines+markers",
            line=dict(dash="dot", width=1),
            marker=dict(size=6, color=color_seq[conc_idx]),
            error_y=dict(
                type="data",
                array=yerr,
                visible=True
            ),
            name=f"{conc_val} ppm",
            showlegend=False
        ),
        row=1, col=2
    )

    fig.add_trace(
        go.Scatter(
            x=[100, 550],
            y=[base, base],
            mode="lines",
            line=dict(color="rgba(80,80,80,0.8)", dash="dash", width=0.8),
            showlegend=False
        ),
        row=1, col=2
    )

    for xv, yv in zip(x_res, y):
        factor = yv / base if base > 0 else np.nan
        fig.add_annotation(
            x=xv + 18,
            y=yv + 4,
            text=f"{factor:.1f}x",
            showarrow=False,
            font=dict(size=8),
            xanchor="left",
            row=1, col=2
        )

fig.update_xaxes(
    title_text="Energy Resolution (eV)",
    tickvals=sorted(x_res),
    range=[100, 550],
    row=1, col=2
)
fig.update_yaxes(
    title_text="LOD Time (minutes)",
    range=[0, 110],
    row=1, col=2
)

x_area = np.array(area_values)  # mm²

for conc_idx, conc_val in enumerate(c):
    y  = LOD_results["area"][conc_idx, :]
    yerr = LOD_results["area_err"][conc_idx, :]
    base = baseline_area[conc_idx]

    fig.add_trace(
        go.Scatter(
            x=x_area,
            y=y,
            mode="lines+markers",
            line=dict(dash="dot", width=1),
            marker=dict(size=6, color=color_seq[conc_idx]),
            error_y=dict(
                type="data",
                array=yerr,
                visible=True
            ),
            name=f"{conc_val} ppm",
            showlegend=False
        ),
        row=1, col=3
    )

    fig.add_trace(
        go.Scatter(
            x=[10, 55],
            y=[base, base],
            mode="lines",
            line=dict(color="rgba(80,80,80,0.8)", dash="dash", width=0.8),
            showlegend=False
        ),
        row=1, col=3
    )

    for xv, yv in zip(x_area, y):
        factor = yv / base if base > 0 else np.nan
        fig.add_annotation(
            x=xv - 1.8,
            y=yv + 4,
            text=f"{factor:.1f}x",
            showarrow=False,
            font=dict(size=8),
            xanchor="right",
            row=1, col=3
        )

fig.update_xaxes(
    title_text="Detector Area (mm²)",
    tickvals=sorted(x_area),
    range=[55, 10],  # invert_xaxis
    row=1, col=3
)
fig.update_yaxes(
    title_text="LOD Time (minutes)",
    range=[0, 350],
    row=1, col=3
)

fig.update_layout(
    width=900,
    height=350,
    title=(
        "Detector Parameter Effects on LOD Time<br>"
        "Baseline: 7× Am241, 150 eV, 50 mm², 100% efficiency"
    ),
    title_x=0.5,
    font=dict(size=10, family="Arial"),
    legend=dict(
        x=0.0,
        y=-0.2,
        xanchor="left",
        yanchor="top",
        orientation="h",
        font=dict(size=8),
    ),
    margin=dict(t=80, l=60, r=20, b=100)
)

fig.show()