# Notebook to evaluate inferred microbial anomaly scores

To run this notebook you need to create and activate the following conda environment:

```
conda create --name score_eval -c conda-forge -c defaults numpy pandas matplotlib seaborn scipy scikit-learn ipython ipykernel -y
conda activate score_eval
pip install -e .
```


## Setup

In [1]:
import os
import warnings

import matplotlib.pyplot as plt
import pandas as pd

from src.utils_eval_score import (
    _plot_score_after_nth_abx_exposure,
    _plot_score_over_age,
    _select_samples_around_nth_abx_exposure,
    display_scatterplot_w_scores,
    get_scores_n_abx_info,
    plot_time_between_abx_exposures,
    plot_trajectory,
)
from src.utils_meta import display_diet_information

%load_ext autoreload
%autoreload 2
%matplotlib inline
plt.rcParams.update({"font.family": "DejaVu Sans"})
plt.style.use("tableau-colorblind10")

warnings.filterwarnings("ignore", category=FutureWarning)
# avg. number of days per month
DAYS_PER_MONTH = 30.437

USER input: define the inferred model and linked datasets to evaluate here:

In [2]:
#### USER INPUT START
# name of the model
model_name = "saved_models_microbial_novel_alpha_div2/id-55"
# which model version to evaluate: "best" or "last"
point_to_evaluate = "best"

# name of feature dataset used for model
ft_name = "ft_vat19_anomaly_v20240806_entero_genus"
# name of abx time-series used for model
abx_ts_name = "ts_vat19_abx_v20240806"

# limit evaluation to time range up to this many months (if None no limit is set
# and all scores are evaluated)
limit_months = 24.0

# whether to group samples prior to abx exposure in analysis
group_samples = True

# how many samples prior and after abx exposure to consider
min_samples = -3.0
max_samples = 6.0

# whether to filter noabx score samples by having at least 1 obs prior to cutoff
no_filter = True

# whether to display diet info after 1st, 2nd, 3rd abx exposure plots
display_diet_info = True

# whether to have max. resolution of 0.5 months or not
max_resolution = False

# scaling factor options:
scaling_factors_used = True

# if scaling_factors_used is True, then the following options are required:
# non-centered = "nc_std" or centered = "std"
stddev_type = "nc_std"
# moving average window size: 30 or 10
moving_avg = 10
# whether to include duplicates: "--RD-True" or ""
duplicates = "--RD-False"
# using lower bound of 1 for SFs: "lower_bound-1" or ""
lower_bound = ""

#### USER INPUT END

## Prepare data

In [None]:
base_path = f"../data/{model_name}/anomaly_detection/"

res_n_group = f"g{str(group_samples)[0]}_maxres{str(max_resolution)[0]}"

if scaling_factors_used:
    print("Scaling factors used.")
    folder_name = f"using-SF_{stddev_type}_z_scores--moving_avg-{moving_avg}-cummax{lower_bound}{duplicates}"

    scores_path = f"{base_path}scores_{point_to_evaluate}_normal/{folder_name}/"
    evaluation_path = f"{base_path}evaluation_{point_to_evaluate}_overall_{res_n_group}_{stddev_type}_ma{moving_avg}{duplicates.replace("-", "_").lower()}/"
else:
    scores_path = f"{base_path}scores_{point_to_evaluate}_normal/"
    evaluation_path = (
        f"{base_path}evaluation_{point_to_evaluate}_overall_{res_n_group}_no_scaling/"
    )
if not os.path.exists(evaluation_path):
    os.makedirs(evaluation_path)


In [4]:
noabx_train, noabx_val, abx_scores_flat, abx_df, abx_age_at_all = get_scores_n_abx_info(
    scores_path, ft_name, limit_months, abx_ts_name, no_filter=no_filter
)


## Score after abx exposure 1st, 2nd and 3rd


### score_1, score_2 and score_3 respectively

In [None]:
# get samples around n-th abx exposure
for n in [1, 2, 3]:
    score_col = f"score_{n}"
    scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
        abx_scores_flat,
        abx_df,
        n=n,
        min_samples=min_samples,
        max_samples=max_samples,
        group_samples=group_samples,
        score_var=score_col,
        max_resolution=max_resolution,
    )
    print(scores_abx_nth_samples.shape)
    _plot_score_after_nth_abx_exposure(
        scores_abx_nth_samples,
        x_axis="diff_age_nth_abx",
        y_axis=score_col,
        n=n,
        path_to_save=evaluation_path,
        flag=score_col,
        min_samples=min_samples,
        max_samples=max_samples,
        max_resolution=max_resolution,
        grouped_samples=group_samples,
    )

    if display_diet_info:
        display_diet_information(
            scores_abx_nth_samples,
            "diet_milk",
            "diff_age_nth_abx",
            "samples",
            f"{n}-th abx",
        )
        display_diet_information(
            scores_abx_nth_samples,
            "diet_weaning",
            "diff_age_nth_abx",
            "samples",
            f"{n}-th abx",
        )

In [None]:
plot_time_between_abx_exposures(
    abx_age_at_all, n0_label="1st", n1_label="2nd", path_to_save=evaluation_path
)
plt.show()
plot_time_between_abx_exposures(
    abx_age_at_all, n0_label="2nd", n1_label="3rd", path_to_save=evaluation_path
)
plt.show()

## Compare characteristics of 1st vs. 2nd abx exposures

In [None]:
for n in [1,2]:
    print(n)
    score_col = f"score_{n}"
    scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
        abx_scores_flat,
        abx_df,
        n=n,
        min_samples=min_samples,
        max_samples=max_samples,
        group_samples=group_samples,
        score_var=score_col,
        max_resolution=max_resolution,
    )
    print("Abx duration:")
    print(scores_abx_nth_samples["abx_any_last_dur_days"].describe())
    
    # print("Abx type top 5 counts:")
    # print(scores_abx_nth_samples["abx_type"].value_counts(dropna=False).head())

    # print("Abx reason top 5 counts:")
    # print(scores_abx_nth_samples["abx_reason"].value_counts(dropna=False).head())

Their abx characteristics are similar.

## Score after 2nd abx: split by time since 1st abx exposure

In [8]:
duration_threshold = 3  # in months

In [9]:
abx_time_between = abx_age_at_all.copy()
abx_time_between["time_since_1st"] = (
    abx_time_between["age_2nd_abx"] - abx_time_between["age_1st_abx"]
)

bins = [-float("inf"), duration_threshold, float("inf")]
between_labels = [f"< {duration_threshold} months", f">= {duration_threshold} months"]
abx_time_between["time_since_1st_cat"] = pd.cut(
    abx_time_between["time_since_1st"],
    bins=bins,
    labels=between_labels,
    right=False,
)
abx_time_between.reset_index(inplace=True)

In [None]:
# get samples around n-th abx exposure
n = 2
i = 0
score_col = f"score_{n}"
scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
    abx_scores_flat,
    abx_df,
    n=n,
    min_samples=min_samples,
    max_samples=max_samples,
    group_samples=group_samples,
    score_var=score_col,
    max_resolution=max_resolution,
)

for cat in between_labels:
    # filter only for scores of hosts where 2nd abx is in cat time duration
    hosts = abx_time_between[abx_time_between["time_since_1st_cat"] == cat][
        "host_id"
    ].unique()

    scores_abx_nth_samples_f = scores_abx_nth_samples[
        scores_abx_nth_samples["host_id"].isin(hosts)
    ].copy()
    flag = f"time_since_{n-1}th_{i}"
    _plot_score_after_nth_abx_exposure(
        scores_abx_nth_samples_f,
        x_axis="diff_age_nth_abx",
        y_axis=score_col,
        n=n,
        path_to_save=evaluation_path,
        flag=flag,
        tag=f"time since {n-1}st: {cat}",
        min_samples=min_samples,
        max_samples=max_samples,
        max_resolution=max_resolution,
        grouped_samples=group_samples,
    )
    i += 1

## Score after 1st abx

### split by abx characteristics

#### split by abx duration:

In [None]:
n = 1
score_col = f"score_{n}"
scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
    abx_scores_flat,
    abx_df,
    n=n,
    min_samples=min_samples,
    max_samples=max_samples,
    group_samples=group_samples,
    score_var=score_col,
    max_resolution=max_resolution,
)

# bin duration into short, mid and long duration
scores_abx_nth_samples["abx_any_last_dur_days"].hist(bins=10)

bins = [-float("inf"), 7, float("inf")]
dur_labels = ["< 7 days", ">= 7 days"]
scores_abx_nth_samples["abx_duration_category"] = pd.cut(
    scores_abx_nth_samples["abx_any_last_dur_days"],
    bins=bins,
    labels=dur_labels,
    right=False,
)

scores_abx_nth_samples["abx_duration_category"].value_counts(dropna=False)

In [None]:
i = 0
for dur in dur_labels:
    print(dur)
    # evaluation_path_bin = f"{evaluation_path}duration_bins{i}/"
    host_w_dur = (
        scores_abx_nth_samples.loc[
            scores_abx_nth_samples["abx_duration_category"] == dur, "host_id"
        ]
        .unique()
        .tolist()
    )
    scores_abx_nth_samples_dur = scores_abx_nth_samples.loc[
        scores_abx_nth_samples["host_id"].isin(host_w_dur)
    ].copy()
    flag_dur = f"dur_{i}"
    _plot_score_after_nth_abx_exposure(
        scores_abx_nth_samples_dur,
        x_axis="diff_age_nth_abx",
        y_axis=score_col,
        n=n,
        path_to_save=evaluation_path,
        flag=flag_dur,
        tag=f"duration: {dur}",
        min_samples=min_samples,
        max_samples=max_samples,
        max_resolution=max_resolution,
        grouped_samples=group_samples,
    )

    i += 1

#### split by type of abx (top 1 vs. others):

In [13]:
n = 1
score_col = f"score_{n}"
scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
    abx_scores_flat,
    abx_df,
    n=n,
    min_samples=min_samples,
    max_samples=max_samples,
    group_samples=group_samples,
    score_var=score_col,
    max_resolution=max_resolution,
)


In [None]:
scores_abx_nth_samples["abx_type"].value_counts(dropna=False)

In [None]:
# add top 1 vs. others
scores_abx_nth_samples["abx_type_cat"] = scores_abx_nth_samples["abx_type"].apply(
    lambda x: "Penicillin" if x == "Penicillin" else "Others"
)
scores_abx_nth_samples["abx_type_cat"].value_counts(dropna=False)

In [None]:
# select types to look for
abx_types = scores_abx_nth_samples["abx_type_cat"].unique().tolist()
# abx_types = ["Penicillin", "Cephalosporine", "Cotrimoxazole", "Macrolide"]
abx_types

In [None]:
abx_col = "abx_type_cat"
# abx_col = "abx_type"
for abx in abx_types:
    print(abx)
    abx_str = abx.lower().replace(" ", "_").replace(",", "_")
    flag_abx = f"abx_type_{abx_str}"
    # evaluation_path_abx = f"{evaluation_path}abx_type_{abx_str}/"

    # age at time of abx exposure should match a
    scores_abx_nth_samples_abx = scores_abx_nth_samples.loc[
        scores_abx_nth_samples[abx_col].str.contains(abx)
    ].copy()

    _plot_score_after_nth_abx_exposure(
        scores_abx_nth_samples_abx,
        x_axis="diff_age_nth_abx",
        y_axis=score_col,
        n=n,
        path_to_save=evaluation_path,
        flag=flag_abx,
        tag=f"Abx type: {abx}",
        min_samples=min_samples,
        max_samples=max_samples,
        max_resolution=max_resolution,
        grouped_samples=group_samples,
    )

    i += 1

#### split by type of abx reason (top 1 vs. others):

In [None]:
n = 1
score_col = f"score_{n}"
scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
    abx_scores_flat,
    abx_df,
    n=n,
    min_samples=min_samples,
    max_samples=max_samples,
    group_samples=group_samples,
    score_var=score_col,
    max_resolution=max_resolution,
)
# top 1 vs. others
print(scores_abx_nth_samples["abx_reason"].value_counts(dropna=False))

scores_abx_nth_samples["abx_reason_cat"] = scores_abx_nth_samples["abx_reason"].apply(
    lambda x: "Otitis media" if x == "Otitis media" else "Others"
)
print(scores_abx_nth_samples["abx_reason_cat"].value_counts(dropna=False))

reasons = scores_abx_nth_samples["abx_reason_cat"].unique().tolist()


In [19]:
# # top 4 reasons
# reasons = (
#     scores_abx_nth_samples["abx_reason"]
#     .value_counts(dropna=False)
#     .iloc[:4]
#     .index.tolist()
# )
# reasons

In [None]:
for r in reasons:
    print(r)
    r_str = r.lower().replace(" ", "_").replace(",", "_")
    # evaluation_path_r = f"{evaluation_path}abx_reason_{r_str}/"

    # age at time of abx exposure should match a
    scores_abx_nth_samples_r = scores_abx_nth_samples.loc[
        scores_abx_nth_samples["abx_reason"] == r
    ].copy()
    flag_reason = f"abx_reason_{r_str}"
    _plot_score_after_nth_abx_exposure(
        scores_abx_nth_samples_r,
        x_axis="diff_age_nth_abx",
        y_axis=score_col,
        n=n,
        path_to_save=evaluation_path,
        flag=flag_reason,
        tag=f"Abx reason: {r}",
        min_samples=min_samples,
        max_samples=max_samples,
        max_resolution=max_resolution,
        grouped_samples=group_samples,
    )

    i += 1

### split by host characteristics

#### split by time of life:

In [None]:
n = 1
score_col = f"score_{n}"
scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
    abx_scores_flat,
    abx_df,
    n=n,
    min_samples=min_samples,
    max_samples=max_samples,
    group_samples=group_samples,
    score_var=score_col,
    max_resolution=max_resolution,
)

# bin age
scores_abx_nth_samples["age_nth_abx"].hist(bins=24)

# bins_age = [-float("inf"), 6, 12, 18, float("inf")]
# # <6: pre weaning
# age_labels = ["<= 6 months", "6 - 12 months", "12 - 18 months", "18 - 24 months"]
# scores_abx_nth_samples["age_nth_abx_category"] = pd.cut(
#     scores_abx_nth_samples["age_nth_abx"], bins=bins_age, labels=age_labels, right=False
# )

bins_age = [-float("inf"), 12, float("inf")]
# <6: pre weaning
age_labels = ["<= 12 months", "12 - 24 months"]
scores_abx_nth_samples["age_nth_abx_category"] = pd.cut(
    scores_abx_nth_samples["age_nth_abx"], bins=bins_age, labels=age_labels, right=False
)
print(scores_abx_nth_samples["age_nth_abx_category"].value_counts(dropna=False))

i = 0
for a in age_labels:
    print(a)
    # evaluation_path_bin = f"{evaluation_path}age_bins{i}/"

    # age at time of abx exposure should match a
    scores_abx_nth_samples_age = scores_abx_nth_samples.loc[
        scores_abx_nth_samples["age_nth_abx_category"] == a
    ].copy()
    flag_age = f"age_{i}"
    _plot_score_after_nth_abx_exposure(
        scores_abx_nth_samples_age,
        x_axis="diff_age_nth_abx",
        y_axis=score_col,
        n=n,
        path_to_save=evaluation_path,
        flag=flag_age,
        tag=f"age: {a}",
        min_samples=min_samples,
        max_samples=max_samples,
        max_resolution=max_resolution,
        grouped_samples=group_samples,
    )

    i += 1

#### split by delivery mode:

In [None]:
n = 1
score_col = f"score_{n}"
scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
    abx_scores_flat,
    abx_df,
    n=n,
    min_samples=min_samples,
    max_samples=max_samples,
    group_samples=group_samples,
    score_var=score_col,
    max_resolution=max_resolution,
)

delivery_labels = scores_abx_nth_samples.delivery_mode.unique().tolist()

i = 0
for a in delivery_labels:
    print(a)
    scores_abx_nth_samples_delivery = scores_abx_nth_samples.loc[
        scores_abx_nth_samples["delivery_mode"] == a
    ].copy()

    flag_delivery = f"delivery_{a}"

    _plot_score_after_nth_abx_exposure(
        scores_abx_nth_samples_delivery,
        x_axis="diff_age_nth_abx",
        y_axis=score_col,
        n=n,
        path_to_save=evaluation_path,
        flag=flag_delivery,
        tag=f"delivery mode: {a}",
        min_samples=min_samples,
        max_samples=max_samples,
        max_resolution=max_resolution,
        grouped_samples=group_samples,
    )

    i += 1

#### split by feeding mode:

In [None]:
n = 1
score_col = f"score_{n}"
scores_abx_nth_samples = _select_samples_around_nth_abx_exposure(
    abx_scores_flat,
    abx_df,
    n=n,
    min_samples=min_samples,
    max_samples=max_samples,
    group_samples=group_samples,
    score_var=score_col,
    max_resolution=max_resolution,
)

for diet_col in ["diet_milk", "diet_weaning"]:
    diet_labels = scores_abx_nth_samples[diet_col].unique().tolist()

    i = 0
    for a in diet_labels:
        print(a)
        scores_abx_nth_samples_diet = scores_abx_nth_samples.loc[
            scores_abx_nth_samples[diet_col] == a
        ].copy()

        if a is None:
            scores_abx_nth_samples_diet = scores_abx_nth_samples.loc[
                scores_abx_nth_samples[diet_col].isna()
            ].copy()

        flag_diet = f"diet_{a}"

        _plot_score_after_nth_abx_exposure(
            scores_abx_nth_samples_diet,
            x_axis="diff_age_nth_abx",
            y_axis=score_col,
            n=n,
            path_to_save=evaluation_path,
            flag=flag_diet,
            tag=f"diet: {diet_col}={a}",
            min_samples=min_samples,
            max_samples=max_samples,
            max_resolution=max_resolution,
            grouped_samples=group_samples,
        )

        i += 1

## Score over age range

In [23]:
dic_splits_n_scores = {
    "train_noabx": ["score_1", noabx_train, None],
    "val_noabx": ["score_1", noabx_val, None],
    "abx_1st": ["score_1", abx_scores_flat, abx_age_at_all["age_1st_abx"]],
    "abx_2nd": ["score_2", abx_scores_flat, abx_age_at_all["age_2nd_abx"]],
    "abx_3rd": ["score_3", abx_scores_flat, abx_age_at_all["age_3rd_abx"]],
}

for name, v in dic_splits_n_scores.items():
    score_col = v[0]
    scores = v[1]
    abx_age_values = v[2]
    _plot_score_over_age(scores, score_col, name, evaluation_path, abx_age_values)
    plt.show()

## Score overall - scatter

In [24]:
# sort both abx dataframes by increasing abx exposure in same way
abx_scores_flat.sort_values(
    [
        "abx_max_count_ever",
        "max_abx_w_microbiome",
        "host_id",
        "day",
    ],
    ascending=[True, True, True, True],
    inplace=True,
)

# sort abx_df accordingly
# sort abx_df in same order and remove samples that don't exist in md_df
abx_events = pd.DataFrame()
abx_events["host_id"] = abx_scores_flat["host_id"].unique()
abx_events = pd.merge(abx_events, abx_df, on="host_id", how="left")

assert abx_events.host_id.unique().tolist() == abx_scores_flat.host_id.unique().tolist()


In [None]:
dic_splits = {
    "train_noabx": ["score_1", noabx_train, None],
    "val_noabx": ["score_1", noabx_val, None],
    "abx": ["score_1", abx_scores_flat, abx_events],
}

display_scatterplot_w_scores(
    dic_splits, False, path_to_output=evaluation_path, flag="noabx_vs_abx"
)

In [None]:
dic_splits = {
    "abx_1st": ["score_1", abx_scores_flat, abx_events],
    "abx_2nd": ["score_2", abx_scores_flat, abx_events],
    "abx_3rd": ["score_3", abx_scores_flat, abx_events],
}

display_scatterplot_w_scores(
    dic_splits, False, True, path_to_output=evaluation_path, flag="all_abx"
)

## Individual score trajectories

### abx

In [None]:
plot_trajectory(
    abx_scores_flat,
    abx_events,
    "P006862",
    ["score_1", "score_2", "score_3"],
    path_to_output=evaluation_path,
    flag="all_scores",
)

In [None]:
abx_scores_flat.loc[abx_scores_flat.host_id == "E024646", "abx_any_cumcount"].describe()

In [None]:
plot_trajectory(
    abx_scores_flat,
    abx_events,
    "E024646",
    ["score_1", "score_2", "score_3"],
    path_to_output=evaluation_path,
    flag="all_scores",
)

In [None]:
abx_scores_flat.loc[abx_scores_flat.host_id == "E009676", "abx_any_cumcount"].describe()

In [None]:
plot_trajectory(
    abx_scores_flat,
    abx_events,
    "E009676",
    ["score_1", "score_2", "score_3"],
    path_to_output=evaluation_path,
    flag="all_scores",
)

In [None]:
abx_scores_flat.loc[abx_scores_flat.host_id == "E004898", :]

In [None]:
plot_trajectory(
    abx_scores_flat,
    abx_events,
    "E004898",
    ["score_1", "score_2", "score_3"],
    path_to_output=evaluation_path,
    flag="all_scores",
)

In [None]:
plot_trajectory(
    abx_scores_flat,
    abx_events,
    "E004628",
    ["score_1", "score_2", "score_3"],
    path_to_output=evaluation_path,
    flag="all_scores",
)

In [None]:
plot_trajectory(
    abx_scores_flat,
    abx_events,
    "E021822",
    ["score_1", "score_2", "score_3"],
    path_to_output=evaluation_path,
    flag="all_scores",
)

In [None]:
plot_trajectory(
    abx_scores_flat,
    abx_events,
    "E003188",
    ["score_1", "score_2", "score_3"],
    path_to_output=evaluation_path,
    flag="all_scores",
)

### noabx

In [None]:
plot_trajectory(
    noabx_train,
    None,
    "E035134",
    ["score_1"],
    path_to_output=evaluation_path,
    flag="noabx_score1",
)

In [None]:
plot_trajectory(
    noabx_train,
    None,
    "E022497",
    ["score_1"],
    path_to_output=evaluation_path,
    flag="noabx_score1",
)