<a href="https://colab.research.google.com/github/OpenXRF/lead-screening/blob/main/Fig3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

OpenXRF_logo_red.svg

# Towards low-cost lead screening with transmission XRF
---


[Citation]


*   Project website: [openxrf.org](https://openxrf.org/)
*   Project GitHub: [github.com/OpenXRF/lead-screening](https://github.com/OpenXRF/lead-screening/)


# Figure 3: Showing LOD threasholds

* Callculating SNR for 10, 50, 100, 200, 500, 1000 ppm Pb in soil
* 2h max exposure for all ppm values
* Data is provided in Google Drive

-> Script computes the visualized data and displays the plot itself in the last cell...

In [2]:
#@title Install packages { display-mode: "form" }
# @markdown Python packages for data processing and visualization

!pip install -q gdown pandas numpy matplotlib plotly
import gdown
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from time import time

print("All packages installed successfully!")

All packages installed successfully!


In [5]:
#@title 1) Load simulated data
## @markdown Loading the data from Drive and add the energy resolution and the radius

#Drive IDs

start_time = time()

ID_10_0 = "14Ie8XVcdB__RBZzvPiEQ-Gi4AdX4tKEh"
ID_10_1 = "1tX7mb81rHj4HCiuefO3RGTJEr1F6mmGO"
ID_10_2 = "1a4p1p1BTF6RFzNr_ZFgUtbZ1kg3OvYjj"
ID_10_3 = "1f4IsH-72QQizuZdoI7d_KFTXM-G_c36i"
ID_10_4 = "1DhfeCh-P56MPfeO9bh830EKZe1WSMBkp"
ID_10_5 = "157_sJfWBRId8TvBQqgqF94O02ktwHP2U"
ID_10_6 = "1ODVjzullAOqDeHKRgSz4YJTtmUI4h45R"

ID_50_0 = "1SBXNvIt9RtEs-WJExY2Ph9Ptmc_o-qME"
ID_50_1 = "1IH0Of7CfK9cnvTWUKhEqay8iiQt_vUEC"
ID_50_2 = "1fiZbNpS0jzS651iC9Zg6OlKgGYwdmWLI"
ID_50_3 = "1_yIR55ajSCSKD_M2JDX3Q2xFmK5yEzrW"
ID_50_4 = "1YF0YcTshUX6VMoqr08xOyJt1ChfWvQyG"
ID_50_5 = "198W2-VjYlNQQR0fWnGpJ_xlaef0uXkxi"
ID_50_6 = "1VBExZ7AU7xc7vNuGYvd-Rt-jk3qJZa9t"

ID_100_0 = "1CMosVezrnpJSAyM4EumSCim_BHKj2nXd"
ID_100_1 = "1SzqKSACYTQBodMxHZdoVfs3oFI9Ww_gB"
ID_100_2 = "1ORShpAKY_Sgzz3odSIoT4g-piIm3neAL"
ID_100_3 = "1Ywki8jrTJZ6ojiEg-qSmXh05KAmkXjxm"
ID_100_4 = "1Ab1fUVQwk8dQxHpKM-VNDfRWuDX3oD7C"
ID_100_5 = "1zHNjkPcghWdVFPfQ1pZ9Wm8Woe0zm-hj"
ID_100_6 = "1-8XsKdq79xtAWtX04VUQeRUsKfSZ0ikO"

ID_200_0 = "1f72MKreFJc6lPkmFWiilcRA1dV7z7wjV"
ID_200_1 = "1UNKIt370FKnp-D_E_RNK9EHyu6gquBEj"
ID_200_2 = "1nmDeGP6eHXl_mj1oARZbwpveX5sJlwzm"
ID_200_3 = "1BH380btO5mKP5JHFmYV0mptTBv-O-QEW"
ID_200_4 = "1ksh3hYTpq69jU4srbkDh3CsBbGtCXQqQ"
ID_200_5 = "1M9792wHtc6CpiHe7bySofGxzw4OZhGaq"
ID_200_6 = "1qvpdKHlsa5bEp_9kHACTKrj-4o3_j1IR"

ID_500_0 = "1vxyeftCuV7rJKvbal0DRiwX52ZXr1wRA"
ID_500_1 = "1T1IXqNwGvAjBVdHp8vl2mXuGYpybms94"
ID_500_2 = "14pIhwrFE1DHyRlFYTDBWWbhUnk7TXxGq"
ID_500_3 = "1qZot0yXnbKP9w80WhSAo_fyRW-K0_uMy"
ID_500_4 = "19VL-6BgK5juD3H91kDNIjs-usjKNJL5X"
ID_500_5 = "14bCJvYhENSK3vnTUezHZFVjIDL1CPms7"
ID_500_6 = "1tTudvasUyfJpLEnuLGQ_uO53KPD87xvf"

ID_1000_0 = "1FEvvAw3xApOI2uJBGiaGiaP2we5aY0dz"
ID_1000_1 = "1lP-pLKILJwQliF0EokGrCyxvTNg8QJyi"
ID_1000_2 = "1C_yBwnMvP44Jnn2CaUEn7Y6qRPO5PZIm"
ID_1000_3 = "1b3JrEi0BiI2hIv-thX7bZyx5bv8UW0Pe"
ID_1000_4 = "1oRSHcxBa8Dgz-fJGkorPxz2EEj2eNwVV"
ID_1000_5 = "1P6SDkaKuPWn60Yglu_dRhzek9aO0UtET"
ID_1000_6 = "1qo5_wEz-Q_VYIn6EvRpAVyx62lAzjgcS"



def read_dataset(base_conc):
    """
    Reads the 7 data csv files for one ppm concentration
    output: {'S0': df, ..., 'S6': df}
    """
    ds = {}
    for source_number in range(7):
        var_name = f"ID_{base_conc}_{source_number}"
        try:
            file_id = globals()[var_name]
        except KeyError:
            raise KeyError(f"Variable '{var_name}' not defined.")
        url = f"https://drive.google.com/uc?export=download&id={file_id}"
        gdown.download(url = url,
               output=f'data_{base_conc}_{source_number}.csv',
               quiet=True, fuzzy=True, use_cookies=False)
        df = pd.read_csv(f'data_{base_conc}_{source_number}.csv')
        ds[f"S{source_number}"] = df
    return ds

def add_energy_resolution_and_radius(ds_dict, FWHM_keV, detector_radius_mm, rng):
    """
    Adds 'E' (Resolution) and 'r_hit'.
    """
    for k, df in ds_dict.items():
        E_keV = df["Energy_MeV_"] * 1000.0
        sigma = FWHM_keV / 2.355
        E_keV_res = E_keV + rng.normal(0, sigma, size=len(df))
        df["E"] = E_keV_res

        r_hit = np.sqrt(df["x_mm_"]**2 + df["y_mm_"]**2)
        df["r_hit"] = r_hit

    return ds_dict

rng = np.random.default_rng(0)

print("Reading simulated data...")

c_vals = np.array([10, 50, 100, 200, 500, 1000])  # ppm

data10ppm = read_dataset(10)
print('Done 10ppm')
data50ppm = read_dataset(50)
print('Done 50ppm')
data100ppm = read_dataset(100)
print('Done 100ppm')
data200ppm = read_dataset(200)
print('Done 200ppm')
data500ppm = read_dataset(500)
print('Done 500ppm')
data1000ppm = read_dataset(1000)
print('Done 1000ppm')

FWHM_keV = 0.15
print(f"Adding realistic energy resolution: {FWHM_keV:.2f} keV...")

detector_area = 50.0  # mm^2
detector_radius = np.sqrt(detector_area / np.pi)  # mm

datasets = [
    data10ppm,
    data50ppm,
    data100ppm,
    data200ppm,
    data500ppm,
    data1000ppm,
]

for i in range(len(datasets)):
    datasets[i] = add_energy_resolution_and_radius(
        datasets[i],
        FWHM_keV=FWHM_keV,
        detector_radius_mm=detector_radius,
        rng=rng
    )

(data10ppm,
  data50ppm,
  data100ppm,
  data200ppm,
  data500ppm,
  data1000ppm) = datasets

print(f"Data loaded successfully (took {time() - start_time:.1f} s)")

Reading simulated data...


FileURLRetrievalError: Failed to retrieve file url:

	Cannot retrieve the public link of the file. You may need to change
	the permission to 'Anyone with the link', or have had many accesses.
	Check FAQ in https://github.com/wkentaro/gdown?tab=readme-ov-file#faq.

You may still be able to access the file from the browser:

	https://drive.google.com/uc?id=14Ie8XVcdB__RBZzvPiEQ-Gi4AdX4tKEh

but Gdown can't. Please check connections and permissions.

In [6]:
data10ppm = read_dataset(10)
print('Done 10ppm')

FileURLRetrievalError: Failed to retrieve file url:

	Cannot retrieve the public link of the file. You may need to change
	the permission to 'Anyone with the link', or have had many accesses.
	Check FAQ in https://github.com/wkentaro/gdown?tab=readme-ov-file#faq.

You may still be able to access the file from the browser:

	https://drive.google.com/uc?id=14Ie8XVcdB__RBZzvPiEQ-Gi4AdX4tKEh

but Gdown can't. Please check connections and permissions.

In [None]:
#@title 2) Analyse experimental parameters and visualize results { display-mode: "form" }

def snr_model(t, A):
    # SNR = A * sqrt(t)
    return A * np.sqrt(t)


def lod_model(t, A1, A2):
    # c_LOD(t) = (A1 + sqrt(A1^2 + A2*t)) / t
    return (A1 + np.sqrt(A1**2 + A2 * t)) / t

# ------------------------------------------------------------
# Peak-Parameter
# ------------------------------------------------------------
peak = {
    "alpha": {"E": 10.55},  # keV
    "beta": {"E": 12.65},
    "window": FWHM_keV
}

# ------------------------------------------------------------
# Time values
# ------------------------------------------------------------
fluence = 995 * np.pi * 2 * (1 - np.cos(np.pi / 4))  # ph/s per source
step_size_min = 5  # min
max_event = 180 * 60 * fluence
step_size = step_size_min * 60 * fluence
num_steps = int(np.floor(max_event / step_size))
time_values = np.arange(1, num_steps + 1) * step_size_min  # in min

# ------------------------------------------------------------
# Histogramm-Setup
# ------------------------------------------------------------
T = np.array([10, 30, 60])  # min
T_steps = (T / step_size_min).astype(int)

binwidth = 0.03  # keV
E = np.arange(0, 75 + binwidth, binwidth)


# Container for SNR-Data
SNR = [dict() for _ in range(len(c_vals))]
saved_histograms = {}

print("\n=== Calculating SNR over time ===")

for conc_idx, conc in enumerate(c_vals):
    print(f"Processing {conc} ppm...")

    signal_over_time = np.zeros(num_steps)
    background_over_time = np.zeros(num_steps)
    signal_over_time_S0 = np.zeros(num_steps)
    background_over_time_S0 = np.zeros(num_steps)

    SNR[conc_idx]["alpha_individual"] = np.zeros((7, num_steps))
    SNR[conc_idx]["beta_individual"] = np.zeros((7, num_steps))
    SNR[conc_idx]["total_individual"] = np.zeros((7, num_steps))

    for step in range(1, num_steps + 1):
        end_event = step * step_size
        current_time = step * step_size_min
        print(f" - {current_time} min...")

        total_alpha_secondary = 0
        total_alpha_primary = 0
        total_beta_secondary = 0
        total_beta_primary = 0

        is_save_point = step in T_steps
        if is_save_point:
            combined_primary_hist = np.zeros_like(E)
            combined_secondary_hist = np.zeros_like(E)

        # all 7 Sources
        for s in range(7):
            field = f"S{s}"
            data = datasets[conc_idx][field]

            mask = (
                (data["EventID"] < end_event) &
                (data["Particle"] == "gamma") &
                (data["r_hit"] <= detector_radius)
            )
            df_f = data[mask]

            primary = df_f[df_f["Type"] == "Primary"]
            secondary = df_f[df_f["Type"] == "Secondary"]

            primary_hist, _ = np.histogram(primary["E"], bins=np.append(E, E[-1] + binwidth))
            secondary_hist, _ = np.histogram(secondary["E"], bins=np.append(E, E[-1] + binwidth))

            if is_save_point:
                combined_primary_hist += primary_hist
                combined_secondary_hist += secondary_hist

            # Windowing
            alpha_window = (E >= peak["alpha"]["E"] - peak["window"]) & (E <= peak["alpha"]["E"] + peak["window"])
            beta_window = (E >= peak["beta"]["E"] - peak["window"]) & (E <= peak["beta"]["E"] + peak["window"])

            alpha_primary_s = primary_hist[alpha_window].sum()
            alpha_secondary_s = secondary_hist[alpha_window].sum()
            beta_primary_s = primary_hist[beta_window].sum()
            beta_secondary_s = secondary_hist[beta_window].sum()

            total_secondary_s = alpha_secondary_s + beta_secondary_s
            total_primary_s = alpha_primary_s + beta_primary_s

            # SNR pro Source
            if (alpha_secondary_s + alpha_primary_s) > 0:
                SNR[conc_idx]["alpha_individual"][s, step - 1] = (
                    alpha_secondary_s / np.sqrt(alpha_secondary_s + alpha_primary_s)
                )
            if (beta_secondary_s + beta_primary_s) > 0:
                SNR[conc_idx]["beta_individual"][s, step - 1] = (
                    beta_secondary_s / np.sqrt(beta_secondary_s + beta_primary_s)
                )
            if (total_secondary_s + total_primary_s) > 0:
                SNR[conc_idx]["total_individual"][s, step - 1] = (
                    total_secondary_s / np.sqrt(total_secondary_s + total_primary_s)
                )

            # S0 individual
            if s == 0:
                signal_over_time_S0[step - 1] += total_secondary_s
                background_over_time_S0[step - 1] += total_primary_s

            # Summ over all Sources
            total_alpha_primary += alpha_primary_s
            total_alpha_secondary += alpha_secondary_s
            total_beta_primary += beta_primary_s
            total_beta_secondary += beta_secondary_s

        # Save Histo
        if is_save_point:
            T_idx = np.where(T_steps == step)[0][0] + 1
            field_name = f"c{conc}_T{T_idx}"
            saved_histograms[field_name] = {
                "primary": combined_primary_hist,
                "secondary": combined_secondary_hist,
                "E": E,
                "time_min": current_time,
            }

        total_secondary = total_alpha_secondary + total_beta_secondary
        total_primary = total_alpha_primary + total_beta_primary
        signal_over_time[step - 1] = total_secondary
        background_over_time[step - 1] = total_primary

        # combined SNR
        if (total_alpha_secondary + total_alpha_primary) > 0:
            alpha_combined = total_alpha_secondary / np.sqrt(total_alpha_secondary + total_alpha_primary)
        else:
            alpha_combined = 0

        if (total_beta_secondary + total_beta_primary) > 0:
            beta_combined = total_beta_secondary / np.sqrt(total_beta_secondary + total_beta_primary)
        else:
            beta_combined = 0

        if (total_secondary + total_primary) > 0:
            total_combined = total_secondary / np.sqrt(total_secondary + total_primary)
        else:
            total_combined = 0

        if step == 1:
            SNR[conc_idx]["alpha_combined"] = np.zeros(num_steps)
            SNR[conc_idx]["beta_combined"] = np.zeros(num_steps)
            SNR[conc_idx]["total_combined"] = np.zeros(num_steps)
        SNR[conc_idx]["alpha_combined"][step - 1] = alpha_combined
        SNR[conc_idx]["beta_combined"][step - 1] = beta_combined
        SNR[conc_idx]["total_combined"][step - 1] = total_combined

    SNR[conc_idx]["signal_over_time"] = signal_over_time
    SNR[conc_idx]["background_over_time"] = background_over_time
    SNR[conc_idx]["signal_over_time_S0"] = signal_over_time_S0
    SNR[conc_idx]["background_over_time_S0"] = background_over_time_S0

In [None]:

# ------------------------------------------------------------
# Plot
# ------------------------------------------------------------
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
if len(colors) < 6:
    colors = colors * (6 // len(colors) + 1)

# ============================================================
# FIGURE 1: SNR + LOD
# ============================================================
fig1, (ax_snr, ax_lod) = plt.subplots(
    1, 2,
    figsize=(20/2.54, 10/2.54),
    constrained_layout=True
)

# --- SNR ---
xLim = (0, 120)
yLim = (0, 35)

for idx, conc in enumerate(c_vals):
    t_data = time_values
    snr_data = SNR[idx]["total_combined"]

    ax_snr.plot(
        t_data,
        snr_data,
        'o',
        color=colors[idx],
        markersize=4,
        markerfacecolor=colors[idx],
        label=f"{conc} ppm"
    )

    popt, _ = curve_fit(snr_model, t_data, snr_data, p0=[1.0], maxfev=10000)
    A_fitted = popt[0]

    t_fit = np.linspace(0, time_values.max(), 1000)
    snr_fit = snr_model(t_fit, A_fitted)
    ax_snr.plot(
        t_fit,
        snr_fit,
        '-',
        color=colors[idx],
        label=f"Fit: {A_fitted:.2f} • √t"
    )

for tt in T:
    ax_snr.plot([tt, tt], [0, yLim[1]], 'k--')

ax_snr.set_title("SNR as a function of time with 7x Am241")
ax_snr.set_xlabel("Time (min)")
ax_snr.set_ylabel("SNR")
ax_snr.set_xlim(xLim)
ax_snr.set_ylim(yLim)
ax_snr.tick_params(direction='out')
ax_snr.legend(loc="upper left")

# --- LOD ---
xLim_lod = (0, 120)
yLim_lod = (0, 1200)

# 7x Am241 LOD aus den SNR-Daten
A_values = np.zeros(len(c_vals))
t_LOD = np.zeros(len(c_vals))
sigma_t = np.zeros(len(c_vals))

for idx, conc in enumerate(c_vals):
    t_data = time_values
    snr_data = SNR[idx]["total_combined"]
    signal_data = SNR[idx]["signal_over_time"]
    background_data = SNR[idx]["background_over_time"]

    popt_snr, _ = curve_fit(snr_model, t_data, snr_data, p0=[1.0], maxfev=10000)
    A_values[idx] = popt_snr[0]
    t_LOD[idx] = 9.0 / (A_values[idx] ** 2)

    def lin(x, A):
        return A * x

    s_fit, _ = curve_fit(lin, t_data, signal_data, p0=[1.0])
    b_fit, _ = curve_fit(lin, t_data, background_data, p0=[1.0])

    s = s_fit[0]
    b = b_fit[0]
    t = t_LOD[idx]
    sigma_t[idx] = 9 * np.sqrt(((s + 2*b)**2 / s**5) * (1/t) + (b / s**4) * (1/t))

ax_lod.errorbar(
    t_LOD, c_vals, xerr=sigma_t,
    fmt='o', color='r', mfc='r',
    label="7x Am241"
)

popt_lod, _ = curve_fit(lod_model, t_LOD, c_vals, p0=[10, 10])
t_fit_lod = np.linspace(t_LOD.min()*0.5, t_LOD.max()*2, 200)
c_fit_lod = lod_model(t_fit_lod, *popt_lod)
ax_lod.plot(
    t_fit_lod, c_fit_lod, 'r-',
    label=f"Fit: A1={popt_lod[0]:.0f}, A2={popt_lod[1]:.0f}"
)

# S0-only
A_values_S0 = np.zeros(len(c_vals))
t_LOD_S0 = np.zeros(len(c_vals))
sigma_t_S0 = np.zeros(len(c_vals))

for idx, conc in enumerate(c_vals):
    t_data = time_values
    snr_data_S0 = SNR[idx]["total_individual"][0, :]
    signal_data_S0 = SNR[idx]["signal_over_time_S0"]
    background_data_S0 = SNR[idx]["background_over_time_S0"]

    popt_s0, _ = curve_fit(snr_model, t_data, snr_data_S0, p0=[0.5], maxfev=10000)
    A_values_S0[idx] = popt_s0[0]
    t_LOD_S0[idx] = 9.0 / (A_values_S0[idx] ** 2)

    def lin(x, A):
        return A * x

    s_fit_S0, _ = curve_fit(lin, t_data, signal_data_S0, p0=[1.0])
    b_fit_S0, _ = curve_fit(lin, t_data, background_data_S0, p0=[1.0])

    s0 = s_fit_S0[0]
    b0 = b_fit_S0[0]
    t0 = t_LOD_S0[idx]
    sigma_t_S0[idx] = 9 * np.sqrt(((s0 + 2*b0)**2 / s0**5) * (1/t0) + (b0 / s0**4) * (1/t0))

ax_lod.errorbar(
    t_LOD_S0, c_vals, xerr=sigma_t_S0,
    fmt='o', color='b', mfc='b',
    label="1x Am241"
)

popt_lod_S0, _ = curve_fit(lod_model, t_LOD_S0, c_vals, p0=[10, 10])
c_fit_lod_S0 = lod_model(t_fit_lod, *popt_lod_S0)
ax_lod.plot(
    t_fit_lod, c_fit_lod_S0, 'b-',
    label=f"Fit S0: A1={popt_lod_S0[0]:.0f}, A2={popt_lod_S0[1]:.0f}"
)

ax_lod.set_xlabel("Time (min)")
ax_lod.set_ylabel("Concentration (ppm)")
ax_lod.set_xlim(xLim_lod)
ax_lod.set_ylim(yLim_lod)
ax_lod.set_yticks(np.arange(0, 1201, 100))
ax_lod.set_title("Limit of detection")
ax_lod.tick_params(direction='out')
ax_lod.legend(loc="upper right")

fig1.savefig("Fig3_topplots.pdf", transparent=True)


In [None]:

# ============================================================
# FIGURE 2: 9 Histogramms
# ============================================================
fig2, axes = plt.subplots(
    3, 3,
    figsize=(20/2.54, 20/2.54),
    constrained_layout=True
)

xLim_h = (10, 13)
yLim_h = (0, 50)
red = (0.7874, 0.0930, 0.1196)
blue = (0.0, 0.4470, 0.7410)

plot_concs = [500, 200, 100]

for row, conc in enumerate(plot_concs):
    for col, T_idx in enumerate([1, 2, 3]):
        ax = axes[row, col]
        key = f"c{conc}_T{T_idx}"
        hdata = saved_histograms[key]
        E_loc = hdata["E"]
        prim = hdata["primary"]
        sec = hdata["secondary"]
        total = prim + sec

        ax.fill_between(E_loc, total, step='pre', color=red)
        ax.fill_between(E_loc, prim, step='pre', color=blue)

        ax.plot([peak["alpha"]["E"] - peak["window"]]*2, [0, yLim_h[1]], 'k--',
                label=f"SNR = {SNR[np.where(c_vals==conc)[0][0]]['total_combined'][T_steps[T_idx-1]-1]:.1f}")
        ax.plot([peak["alpha"]["E"] + peak["window"]]*2, [0, yLim_h[1]], 'k--')
        ax.plot([peak["beta"]["E"] - peak["window"]]*2, [0, yLim_h[1]], 'k--')
        ax.plot([peak["beta"]["E"] + peak["window"]]*2, [0, yLim_h[1]], 'k--')

        ax.set_title(f"{T[T_idx-1]} min - {conc} ppm", fontsize=8)
        ax.set_xlim(xLim_h)
        ax.set_ylim(yLim_h)
        ax.set_xlabel("Energy (keV)", fontsize=7)
        ax.set_ylabel("Detected counts", fontsize=7)
        ax.tick_params(direction='out', labelsize=7)

        # WICHTIG: nicht quadratisch erzwingen
        ax.set_aspect('auto')

        ax.legend(loc="upper center", fontsize=6)

fig2.savefig("Fig3_histograms.pdf", transparent=True)

plt.show()

# ------------------------------------------------------------
# LOD-Werte ausgeben
# ------------------------------------------------------------
print("\n=== LOD Values with Error Estimates ===")
print("7x Am241 Sources:")
print("PPM\tLOD Time (min)\tSigma_t (min)")
for conc, t_l, s_t in zip(c_vals, t_LOD, sigma_t):
    print(f"{conc}\t{t_l:.2f}\t\t{s_t:.2f}")

print("\n1x Am241 Source:")
print("PPM\tLOD Time (min)\tSigma_t (min)")
for conc, t_l, s_t in zip(c_vals, t_LOD_S0, sigma_t_S0):
    print(f"{conc}\t{t_l:.2f}\t\t{s_t:.2f}")
