In [32]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import warnings

from pathlib import Path

import patato as pat

In [33]:
df_data_all_points = pd.read_parquet(
    "01_Prepare Data/pa_values_extracted_all_meta.parquet"
)


def agg_function(x, npf=np.nanmean):
    if np.issubdtype(x.dtype, np.number):
        return npf(x)
    elif hasattr(x.iloc[0], "dtype"):
        # Assume that this is a numpy array.
        return npf(np.stack(x), axis=0)
    else:
        return x.iloc[0]


# def agg_function(x, npfn=np.nanmean):
#     return npfn(np.stack(x), axis=0) if not np.isscalar(x.iloc[0]) else x.iloc[0]


def query_and_aggregate(region, roi, df_data=df_data_all_points, aggregate=True):
    df_data = df_data.query(
        f"(Location == '{region}') and (`OtherDetail` == '') and \
               (`ROI Name` == '{roi}') and (Parallel == '')"
    )
    if not aggregate:
        return df_data, None
    # First, average over the three scans taken per patient
    df_data_all = (
        df_data.groupby(["SkinID", "RunNumber", "Side", "Pigment"])
        .agg(agg_function)
        .reset_index()
    )

    # For CoV, remove Vitiligo cohort
    df_data_all_cov = df_data_all.query("`Fitzpatrick Type` != 'Vitiligo'")

    m = df_data_all_cov.groupby(["SkinID", "ITA"]).apply("mean", numeric_only=True)
    s = df_data_all_cov.groupby(["SkinID", "ITA"]).apply("std", numeric_only=True)
    cov = (s["so2_mean"] / m["so2_mean"]).reset_index()
    cov = cov.rename(columns={"so2_mean": "cov_so2"})
    cov["std_so2"] = s.reset_index()["so2_mean"]

    thb_metric = "thb_positive_mean"
    cov["cov_thb"] = (s[thb_metric] / m[thb_metric]).reset_index()[thb_metric]
    cov["std_thb"] = s.reset_index()[thb_metric]
    cov.dropna(inplace=True)

    df_data = df_data_all.groupby("SkinID").agg(agg_function).reset_index()
    return df_data, cov

## Compute statistics for paper: Skin colour bias in photoacoustic imaging

**Figure 1:**
Subpanel C. We compute a linear model between ITA and Fitzpatrick type. Equivalent to lm(ITA ~ FP), treating FP as a number. We do a two-tailed t-test on the gradient of this (i.e. the coefficient of FP). 

Write in the text that one participant was excluded from the analysis as we did not have consistent ultrasound data.

In [34]:
df_data_neck, _ = query_and_aggregate("neck", "skin")

In [35]:
# 1. Statistics associated with Figure 1:
def get_statistics_figure_1(region="Neck"):
    df_data, _ = query_and_aggregate(region.lower(), "skin")
    # Load in the pa data.
    # wavelengths_pa = np.load("pa_wavelengths.npy")

    # Figure 1C (FP vs. ITA)
    print("Figure 1E (FP vs. ITA). LM ITA ~ FP")
    # display(df_ita.head())
    df_no_vit = df_data.query("`Fitzpatrick Type`!='Vitiligo'").copy()
    df_no_vit["FPint"] = df_no_vit["Fitzpatrick Type"].astype(int)
    df_no_vit["FPint"] = df_no_vit["Fitzpatrick Type"].astype(int)
    model = smf.ols("ITA ~ FPint", data=df_no_vit)
    model_fit = model.fit()
    print(f"p = {model_fit.pvalues['FPint']:.4f}; t = {model_fit.tvalues['FPint']:.2f}")
    # print(f"p = {model_fit.pvalues['np.power(FPint, 2)']:.4f}")

    a, b = model_fit.conf_int().loc["Intercept"]
    c = model_fit.params["Intercept"]
    print(f"Intercept = {c:.0f}; 95% CI: [{a:.0f}, {b:.0f}]")
    a, b = model_fit.conf_int().loc["FPint"]
    c = model_fit.params["FPint"]
    print(f"Gradient = {c:.0f}; 95% CI: [{a:.0f}, {b:.0f}]")
    print(f"nobs = {model_fit.nobs}")

    print("Figure n/a (FP vs. PA_700). LM PA_700 ~ FP")
    df_no_vit["pa700"] = df_no_vit["spectrum_mean"].apply(lambda x: x[0])
    model = smf.ols("pa700 ~ FPint", data=df_no_vit)
    model_fit = model.fit()
    print(f"p = {model_fit.pvalues['FPint']:.4f}; t = {model_fit.tvalues['FPint']:.2f}")
    # print(f"p = {model_fit.pvalues['np.power(FPint, 2)']:.4f}")

    a, b = model_fit.conf_int().loc["Intercept"]
    c = model_fit.params["Intercept"]
    print(f"Intercept = {c:.3f}; 95% CI: [{a:.3f}, {b:.3f}]")
    a, b = model_fit.conf_int().loc["FPint"]
    c = model_fit.params["FPint"]
    print(f"Gradient = {c:.3f}; 95% CI: [{a:.3f}, {b:.3f}]")
    print(f"nobs = {model_fit.nobs}")

    print("Figure 1G (PA_700 vs ITA). LM PA_700 ~ ITA")
    df_data["pa700"] = df_data["spectrum_positive_mean"].apply(lambda x: x[0])
    model = smf.ols("pa700 ~ ITA", data=df_data)
    model_fit = model.fit()
    print(f"p = {model_fit.pvalues['ITA']:.4f}; t = {model_fit.tvalues['ITA']:.2f}")
    # print(f"p = {model_fit.pvalues['np.power(FPint, 2)']:.4f}")

    a, b = model_fit.conf_int().loc["Intercept"]
    c = model_fit.params["Intercept"]
    print(f"Intercept = {c:.3}; 95% CI: [{a:.3}, {b:.3}]")
    a, b = model_fit.conf_int().loc["ITA"]
    c = model_fit.params["ITA"]
    print(f"Gradient = {c:.5f}; 95% CI: [{a:.5f}, {b:.5f}]")
    print(f"nobs = {model_fit.nobs}")

    return model_fit

In [36]:
wavelengths = []

In [37]:
fit = get_statistics_figure_1()

Figure 1E (FP vs. ITA). LM ITA ~ FP
p = 0.0000; t = -13.10
Intercept = 75; 95% CI: [63, 87]
Gradient = -19; 95% CI: [-23, -16]
nobs = 35.0
Figure n/a (FP vs. PA_700). LM PA_700 ~ FP
p = 0.0000; t = 11.28
Intercept = -0.013; 95% CI: [-0.022, -0.003]
Gradient = 0.014; 95% CI: [0.011, 0.016]
nobs = 35.0
Figure 1G (PA_700 vs ITA). LM PA_700 ~ ITA
p = 0.0000; t = -17.70
Intercept = 0.0679; 95% CI: [0.0626, 0.0731]
Gradient = -0.00122; 95% CI: [-0.00136, -0.00108]
nobs = 41.0


## Figure 2:

Compute the correlation between the radial artery signal and ITA. Likewise for the bicep muscle. 
Might as well do the same for the carotid artery while we're at it...

In [38]:
# 2. Statistics associated with Figure 2:
def get_statistics_figure_2(region="Forearm", wavelength=700):
    # Load ITA from Neck region and FP data. Merge into one table.
    roi = "muscle" if region.lower() in ["bicep", "leg"] else "artery"

    df_data, _ = query_and_aggregate(region.lower(), roi)

    wavelengths_pa = np.load("pa_wavelengths.npy")
    wl_i = np.argmin(np.abs(wavelengths_pa - wavelength))

    # df_pa = aggregate_pa_data(df_pa)
    # df_pa = df_ita.merge(df_pa, on=["SkinID", "Label"])

    print("Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.")
    df_data["y"] = df_data["spectrum_positive_mean"].apply(lambda x: x[wl_i])
    model = smf.ols("y ~ ITA", df_data)
    model_fit = model.fit()
    print(f"p = {model_fit.pvalues['ITA']:.4}; t = {model_fit.tvalues['ITA']:.2}")

    a, b = model_fit.conf_int().loc["Intercept"]
    c = model_fit.params["Intercept"]
    print(f"Intercept = {c:.3}; 95% CI: [{a:.3}, {b:.3}]")

    a, b = model_fit.conf_int().loc["ITA"]
    c = model_fit.params["ITA"]
    print(f"Gradient = {c:.3}; 95% CI: [{a:.3}, {b:.3}]")
    print(f"nobs = {model_fit.nobs}")

    # Calculate the modelled fold-change
    p = model_fit.get_prediction()
    preds = p.summary_frame().sort_values("mean")
    x = preds.iloc[0]["mean"]
    y = preds.iloc[-1]["mean"]
    fc = preds.iloc[-1]["mean"] / preds.iloc[0]["mean"]
    ita_min = np.min(df_data["ITA"])
    ita_max = np.max(df_data["ITA"])
    print(
        f"PA signal goes from {x:.2} to {y:.2} ({fc:.1f} fold change) for ITA={ita_min} to {ita_max}."
    )

    print("Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.")
    df_data["y"] = df_data["spectrum_std"].apply(lambda x: x[wl_i])
    model = smf.ols("y ~ ITA", df_data)
    model_fit = model.fit()
    print(f"p = {model_fit.pvalues['ITA']:.4}; t = {model_fit.tvalues['ITA']:.2}")

    a, b = model_fit.conf_int().loc["Intercept"]
    c = model_fit.params["Intercept"]
    print(f"Intercept = {c:.3}; 95% CI: [{a:.3}, {b:.3}]")
    a, b = model_fit.conf_int().loc["ITA"]
    c = model_fit.params["ITA"]
    print(f"Gradient = {c:.3}; 95% CI: [{a:.3}, {b:.3}]")
    print(f"nobs = {model_fit.nobs}")

    # assert np.sum(np.isnan(df_pa["y"])) == 0

In [39]:
np.load("pa_wavelengths.npy")

array([ 700.,  730.,  760.,  800.,  850.,  910.,  930.,  950.,  980.,
       1030., 1080., 1100., 1210.])

In [40]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_2("Forearm", 700)
    get_statistics_figure_2("Forearm", 800)
    get_statistics_figure_2("Forearm", 1080)

Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 0.3816; t = -0.89
Intercept = 0.0283; 95% CI: [0.0258, 0.0308]
Gradient = -2.72e-05; 95% CI: [-8.94e-05, 3.5e-05]
nobs = 41.0
PA signal goes from 0.026 to 0.029 (1.1 fold change) for ITA=-42.13 to 68.81.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 0.0005547; t = -3.8
Intercept = 0.0282; 95% CI: [0.0258, 0.0306]
Gradient = -0.000111; 95% CI: [-0.00017, -5.11e-05]
nobs = 41.0
Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 0.2771; t = 1.1
Intercept = 0.039; 95% CI: [0.0348, 0.0432]
Gradient = 5.68e-05; 95% CI: [-4.74e-05, 0.000161]
nobs = 41.0
PA signal goes from 0.037 to 0.043 (1.2 fold change) for ITA=-42.13 to 68.81.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 0.7004; t = 0.39
Intercept = 0.0365; 95% CI: [0.0327, 0.0403]
Gradient = 1.8e-05; 95% CI: [-7.59e-05, 0.000112]
nobs = 41.0
Print the statistics for lm(PA_wavelength ~ IT

In [41]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_2("Bicep", 700)
    get_statistics_figure_2("Bicep", 800)
    get_statistics_figure_2("Bicep", 1080)

Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 2.704e-09; t = -7.7
Intercept = 0.00263; 95% CI: [0.00235, 0.0029]
Gradient = -2.48e-05; 95% CI: [-3.13e-05, -1.82e-05]
nobs = 41.0
PA signal goes from 0.00088 to 0.0042 (4.8 fold change) for ITA=-63.59 to 70.37.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 1.47e-13; t = -1.1e+01
Intercept = 0.00431; 95% CI: [0.00388, 0.00473]
Gradient = -5.55e-05; 95% CI: [-6.57e-05, -4.53e-05]
nobs = 41.0
Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 3.691e-06; t = -5.4
Intercept = 0.00176; 95% CI: [0.00156, 0.00196]
Gradient = -1.29e-05; 95% CI: [-1.77e-05, -8.05e-06]
nobs = 41.0
PA signal goes from 0.00085 to 0.0026 (3.0 fold change) for ITA=-63.59 to 70.37.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 1.568e-11; t = -9.4
Intercept = 0.00257; 95% CI: [0.00232, 0.00282]
Gradient = -2.78e-05; 95% CI: [-3.38e-05, -2.18e-05]
nobs = 41.0
Print th

In [42]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_2("Neck", 700)
    get_statistics_figure_2("Neck", 800)
    get_statistics_figure_2("Neck", 1080)
print(
    "With the neck, several observations are ignored because there were presumably no positive-valued pixels in the region."
)

Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 0.0006271; t = -3.7
Intercept = 0.00128; 95% CI: [0.00108, 0.00149]
Gradient = -1.02e-05; 95% CI: [-1.57e-05, -4.63e-06]
nobs = 41.0
PA signal goes from 0.00073 to 0.002 (2.7 fold change) for ITA=-67.30666666666666 to 54.196000000000005.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 3.726e-05; t = -4.7
Intercept = 0.00194; 95% CI: [0.00168, 0.00219]
Gradient = -1.56e-05; 95% CI: [-2.24e-05, -8.83e-06]
nobs = 41.0
Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 0.7287; t = 0.35
Intercept = 0.00116; 95% CI: [0.000924, 0.00139]
Gradient = 1.07e-06; 95% CI: [-5.11e-06, 7.24e-06]
nobs = 41.0
PA signal goes from 0.0011 to 0.0012 (1.1 fold change) for ITA=-67.30666666666666 to 54.196000000000005.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 0.2047; t = -1.3
Intercept = 0.00155; 95% CI: [0.00136, 0.00175]
Gradient = -3.26e-06; 95% CI: [-8.

In [43]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_2("Leg", 700)
    get_statistics_figure_2("Leg", 800)
    get_statistics_figure_2("Leg", 1080)

Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 5.173e-11; t = -9.0
Intercept = 0.00408; 95% CI: [0.00371, 0.00444]
Gradient = -3.71e-05; 95% CI: [-4.54e-05, -2.88e-05]
nobs = 40.0
PA signal goes from 0.0018 to 0.0066 (3.7 fold change) for ITA=-66.89 to 62.102.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 3.77e-18; t = -1.6e+01
Intercept = 0.00637; 95% CI: [0.00589, 0.00684]
Gradient = -8.39e-05; 95% CI: [-9.47e-05, -7.3e-05]
nobs = 40.0
Print the statistics for lm(PA_wavelength ~ ITA). Two-tailed tests.
p = 1.02e-06; t = -5.8
Intercept = 0.00285; 95% CI: [0.00256, 0.00313]
Gradient = -1.89e-05; 95% CI: [-2.55e-05, -1.23e-05]
nobs = 40.0
PA signal goes from 0.0017 to 0.0041 (2.5 fold change) for ITA=-66.89 to 62.102.
Print the statistics for lm(STD_PA_wavelength ~ ITA). Two-tailed tests.
p = 4.606e-13; t = -1.1e+01
Intercept = 0.00398; 95% CI: [0.0036, 0.00436]
Gradient = -4.63e-05; 95% CI: [-5.5e-05, -3.76e-05]
nobs = 40.0
Print t

## Figure 3 statistics: Linear unmixing 

In [44]:
ita_threshold = 10

In [45]:
# 3. Statistics associated with Figure 3:
def get_statistics_figure_3(
    region="Forearm", um_quantity="so2", ita_lim=None, greater=True
):
    # Load ITA from Neck region and FP data. Merge into one table.
    roi = "muscle" if region.lower() in ["bicep", "leg"] else "artery"

    df_data, _ = query_and_aggregate(region.lower(), roi)

    if um_quantity == "thb":
        um_q_name = "thb_positive_mean"
    elif um_quantity == "so2":
        um_q_name = "so2_mean"
    elif um_quantity == "so2_corrected":
        um_q_name = "corrected_so2_mean"
    elif um_quantity == "thb_corrected":
        um_q_name = "corrected_thb_positive_mean"
    if ita_lim is not None and greater:
        df_data = df_data[df_data["ITA"] > ita_lim].copy()
    if ita_lim is not None and not greater:
        df_data = df_data[df_data["ITA"] < ita_lim].copy()

    print("Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.")
    df_data["y"] = df_data[um_q_name]
    model = smf.ols("y ~ ITA", df_data)
    model_fit = model.fit()
    print(f"p = {model_fit.pvalues['ITA']:.4}; t = {model_fit.tvalues['ITA']:.2}")

    a, b = model_fit.conf_int().loc["Intercept"]
    c = model_fit.params["Intercept"]
    print(f"Intercept = {c:.3}; 95% CI: [{a:.3}, {b:.3}]")

    a, b = model_fit.conf_int().loc["ITA"]
    c = model_fit.params["ITA"]
    print(f"Gradient = {c:.3}; 95% CI: [{a:.3}, {b:.3}]")
    print(f"nobs = {model_fit.nobs}")

In [46]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3("Forearm")
    get_statistics_figure_3("Bicep")
    get_statistics_figure_3("Neck")
    get_statistics_figure_3("Leg")

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.7215; t = 0.36
Intercept = 0.771; 95% CI: [0.747, 0.796]
Gradient = 0.000108; 95% CI: [-0.000502, 0.000719]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 3.546e-11; t = 9.1
Intercept = 0.565; 95% CI: [0.551, 0.58]
Gradient = 0.00157; 95% CI: [0.00122, 0.00191]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 3.398e-05; t = 4.7
Intercept = 0.638; 95% CI: [0.624, 0.652]
Gradient = 0.000876; 95% CI: [0.000498, 0.00125]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 1.966e-17; t = 1.5e+01
Intercept = 0.518; 95% CI: [0.506, 0.53]
Gradient = 0.00204; 95% CI: [0.00177, 0.00232]
nobs = 40.0


In [47]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3("Forearm", ita_lim=ita_threshold)
    get_statistics_figure_3("Bicep", ita_lim=ita_threshold)
    get_statistics_figure_3("Neck", ita_lim=ita_threshold)
    get_statistics_figure_3("Leg", ita_lim=ita_threshold)
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3("Forearm", ita_lim=ita_threshold, greater=False)
    get_statistics_figure_3("Bicep", ita_lim=ita_threshold, greater=False)
    get_statistics_figure_3("Neck", ita_lim=ita_threshold, greater=False)
    get_statistics_figure_3("Leg", ita_lim=ita_threshold, greater=False)

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.006449; t = -3.0
Intercept = 0.844; 95% CI: [0.797, 0.892]
Gradient = -0.00145; 95% CI: [-0.00244, -0.000447]
nobs = 25.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.5722; t = 0.57
Intercept = 0.628; 95% CI: [0.588, 0.667]
Gradient = 0.000236; 95% CI: [-0.000612, 0.00108]
nobs = 27.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.06536; t = -1.9
Intercept = 0.719; 95% CI: [0.667, 0.771]
Gradient = -0.00127; 95% CI: [-0.00262, 8.77e-05]
nobs = 24.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.002931; t = 3.3
Intercept = 0.558; 95% CI: [0.525, 0.591]
Gradient = 0.00117; 95% CI: [0.00044, 0.0019]
nobs = 26.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.3935; t = 0.88
Intercept = 0.789; 95% CI: [0.712, 0.867]
Gradient = 0.00131; 95% CI: [-0.00188, 0.0045]
nobs = 16.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-t

In [48]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3("Forearm", um_quantity="thb")
    get_statistics_figure_3("Bicep", um_quantity="thb")
    get_statistics_figure_3("Neck", um_quantity="thb")
    get_statistics_figure_3("Leg", um_quantity="thb")

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.3048; t = 1.0
Intercept = 4.5e-05; 95% CI: [4.02e-05, 4.99e-05]
Gradient = 6.22e-08; 95% CI: [-5.88e-08, 1.83e-07]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 2.841e-05; t = -4.7
Intercept = 2.01e-06; 95% CI: [1.76e-06, 2.26e-06]
Gradient = -1.42e-08; 95% CI: [-2.02e-08, -8.11e-09]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.6843; t = 0.41
Intercept = 1.25e-06; 95% CI: [9.69e-07, 1.52e-06]
Gradient = 1.49e-09; 95% CI: [-5.87e-09, 8.85e-09]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 1.13e-05; t = -5.1
Intercept = 3.22e-06; 95% CI: [2.87e-06, 3.56e-06]
Gradient = -1.96e-08; 95% CI: [-2.75e-08, -1.18e-08]
nobs = 40.0


In [49]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3("Forearm", um_quantity="thb", ita_lim=ita_threshold)
    get_statistics_figure_3("Bicep", um_quantity="thb", ita_lim=ita_threshold)
    get_statistics_figure_3("Neck", um_quantity="thb", ita_lim=ita_threshold)
    get_statistics_figure_3("Leg", um_quantity="thb", ita_lim=ita_threshold)
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3(
        "Forearm", um_quantity="thb", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Bicep", um_quantity="thb", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Neck", um_quantity="thb", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Leg", um_quantity="thb", ita_lim=ita_threshold, greater=False
    )

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.1244; t = -1.6
Intercept = 6.06e-05; 95% CI: [4.38e-05, 7.74e-05]
Gradient = -2.7e-07; 95% CI: [-6.21e-07, 8.03e-08]
nobs = 25.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.4267; t = -0.81
Intercept = 1.65e-06; 95% CI: [8.38e-07, 2.46e-06]
Gradient = -6.79e-09; 95% CI: [-2.41e-08, 1.05e-08]
nobs = 27.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.002346; t = -3.4
Intercept = 3.13e-06; 95% CI: [2.05e-06, 4.2e-06]
Gradient = -4.65e-08; 95% CI: [-7.45e-08, -1.85e-08]
nobs = 24.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.2573; t = -1.2
Intercept = 2.87e-06; 95% CI: [1.95e-06, 3.78e-06]
Gradient = -1.14e-08; 95% CI: [-3.17e-08, 8.89e-09]
nobs = 26.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.09137; t = 1.8
Intercept = 4.98e-05; 95% CI: [3.96e-05, 6e-05]
Gradient = 3.56e-07; 95% CI: [-6.52e-08, 7.77e-07]
nobs = 16

In [50]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3("Forearm", um_quantity="so2_corrected")
    get_statistics_figure_3("Bicep", um_quantity="so2_corrected")
    get_statistics_figure_3("Neck", um_quantity="so2_corrected")
    get_statistics_figure_3("Leg", um_quantity="so2_corrected")

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.002483; t = 3.2
Intercept = 0.705; 95% CI: [0.682, 0.729]
Gradient = 0.000926; 95% CI: [0.000347, 0.00151]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 1.153e-14; t = 1.2e+01
Intercept = 0.53; 95% CI: [0.517, 0.543]
Gradient = 0.00182; 95% CI: [0.00151, 0.00213]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 7.003e-08; t = 6.6
Intercept = 0.579; 95% CI: [0.565, 0.592]
Gradient = 0.00121; 95% CI: [0.000839, 0.00158]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 1.961e-21; t = 2e+01
Intercept = 0.481; 95% CI: [0.471, 0.491]
Gradient = 0.00218; 95% CI: [0.00196, 0.00241]
nobs = 40.0


In [51]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3(
        "Forearm", um_quantity="so2_corrected", ita_lim=ita_threshold
    )
    get_statistics_figure_3("Bicep", um_quantity="so2_corrected", ita_lim=ita_threshold)
    get_statistics_figure_3("Neck", um_quantity="so2_corrected", ita_lim=ita_threshold)
    get_statistics_figure_3("Leg", um_quantity="so2_corrected", ita_lim=ita_threshold)
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3(
        "Forearm", um_quantity="so2_corrected", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Bicep", um_quantity="so2_corrected", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Neck", um_quantity="so2_corrected", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Leg", um_quantity="so2_corrected", ita_lim=ita_threshold, greater=False
    )

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.7242; t = -0.36
Intercept = 0.759; 95% CI: [0.707, 0.811]
Gradient = -0.000187; 95% CI: [-0.00127, 0.000895]
nobs = 25.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.004651; t = 3.1
Intercept = 0.555; 95% CI: [0.515, 0.595]
Gradient = 0.00129; 95% CI: [0.000437, 0.00215]
nobs = 27.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.9887; t = 0.014
Intercept = 0.628; 95% CI: [0.568, 0.688]
Gradient = 1.07e-05; 95% CI: [-0.00155, 0.00157]
nobs = 24.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 8.206e-07; t = 6.6
Intercept = 0.473; 95% CI: [0.439, 0.507]
Gradient = 0.00239; 95% CI: [0.00164, 0.00313]
nobs = 26.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.4002; t = 0.87
Intercept = 0.706; 95% CI: [0.632, 0.78]
Gradient = 0.00123; 95% CI: [-0.00181, 0.00428]
nobs = 16.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-ta

In [52]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3("Forearm", um_quantity="thb_corrected")
    get_statistics_figure_3("Bicep", um_quantity="thb_corrected")
    get_statistics_figure_3("Neck", um_quantity="thb_corrected")
    get_statistics_figure_3("Leg", um_quantity="thb_corrected")

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 2.938e-05; t = -4.7
Intercept = 7.4e-05; 95% CI: [6.72e-05, 8.08e-05]
Gradient = -3.95e-07; 95% CI: [-5.65e-07, -2.26e-07]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 2.945e-13; t = -1.1e+01
Intercept = 3.72e-06; 95% CI: [3.36e-06, 4.08e-06]
Gradient = -4.65e-08; 95% CI: [-5.52e-08, -3.78e-08]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.01256; t = -2.6
Intercept = 2.12e-06; 95% CI: [1.71e-06, 2.53e-06]
Gradient = -1.4e-08; 95% CI: [-2.49e-08, -3.19e-09]
nobs = 41.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 1.032e-11; t = -9.6
Intercept = 6.28e-06; 95% CI: [5.52e-06, 7.03e-06]
Gradient = -8.19e-08; 95% CI: [-9.91e-08, -6.46e-08]
nobs = 40.0


In [53]:
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3(
        "Forearm", um_quantity="thb_corrected", ita_lim=ita_threshold
    )
    get_statistics_figure_3("Bicep", um_quantity="thb_corrected", ita_lim=ita_threshold)
    get_statistics_figure_3("Neck", um_quantity="thb_corrected", ita_lim=ita_threshold)
    get_statistics_figure_3("Leg", um_quantity="thb_corrected", ita_lim=ita_threshold)
with warnings.catch_warnings(action="ignore"):
    get_statistics_figure_3(
        "Forearm", um_quantity="thb_corrected", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Bicep", um_quantity="thb_corrected", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Neck", um_quantity="thb_corrected", ita_lim=ita_threshold, greater=False
    )
    get_statistics_figure_3(
        "Leg", um_quantity="thb_corrected", ita_lim=ita_threshold, greater=False
    )

Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.0009984; t = -3.8
Intercept = 9.07e-05; 95% CI: [7.08e-05, 0.000111]
Gradient = -7.57e-07; 95% CI: [-1.17e-06, -3.41e-07]
nobs = 25.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.0317; t = -2.3
Intercept = 2.62e-06; 95% CI: [1.65e-06, 3.6e-06]
Gradient = -2.3e-08; 95% CI: [-4.37e-08, -2.18e-09]
nobs = 27.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.0002746; t = -4.3
Intercept = 4.48e-06; 95% CI: [3.13e-06, 5.83e-06]
Gradient = -7.31e-08; 95% CI: [-1.08e-07, -3.8e-08]
nobs = 24.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.002758; t = -3.3
Intercept = 4.47e-06; 95% CI: [3.41e-06, 5.52e-06]
Gradient = -3.79e-08; 95% CI: [-6.13e-08, -1.44e-08]
nobs = 26.0
Print the statistics for lm(PA_unmixed ~ ITA). Two-tailed tests.
p = 0.9611; t = -0.05
Intercept = 8.04e-05; 95% CI: [6.23e-05, 9.85e-05]
Gradient = -1.74e-08; 95% CI: [-7.67e-07, 7.32e-0

Pulse oximeter analysis

In [54]:
df_data, _ = query_and_aggregate("forearm", "artery", aggregate=False)

In [55]:
df_data, _ = query_and_aggregate("forearm", "artery", aggregate=False)
df_data = df_data.copy()
df_so2_all = pd.read_parquet("Cleaned Pulse Ox/so2_ita_pulseox_all.parquet")[
    ["SkinID", "ScanName", "Pulse Ox"]
]
df_data["ScanName"] = df_data["File"].apply(
    lambda x: pat.PAData.from_hdf5(Path("Analysis") / x).get_scan_name()
)

df_data = df_data.merge(df_so2_all, on=["SkinID", "ScanName"])
df_data["so2_mean"] *= 100
df_data = df_data[df_data["Pulse Ox"] > 94].query("ITA > 10")
# df_so2_all = (
#     df_so2_all.groupby(["SkinID", "RunNumber"])
#     .mean(numeric_only=True)
#     .reset_index()
# )
df_data.shape

(163, 60)

In [56]:
model = smf.mixedlm(
    data=df_data,
    formula="so2_mean ~ ITA + Q('Pulse Ox')",
    groups=df_data["SkinID"],
)
mdf = model.fit()
mdf.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,so2_mean
No. Observations:,163,Method:,REML
No. Groups:,18,Scale:,10.1774
Min. group size:,6,Log-Likelihood:,-441.2155
Max. group size:,18,Converged:,Yes
Mean group size:,9.1,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,-0.605,36.634,-0.017,0.987,-72.407,71.197
ITA,-0.201,0.055,-3.648,0.000,-0.310,-0.093
Q('Pulse Ox'),0.900,0.375,2.403,0.016,0.166,1.634
Group Var,10.721,1.390,,,,
