# Notebook to evaluate anomaly scores by unsupervised prediction performance

To run this notebook you need to create and activate the following conda environment:

```
conda create --name score_eval -c conda-forge -c defaults numpy pandas matplotlib seaborn scipy scikit-learn ipython ipykernel -y
conda activate score_eval
pip install -e .
```


## Setup

In [None]:
import os

import numpy as np
import pandas as pd

from src.utils_eval_score import (
    _filter_hosts_w_microbiome_samples_prior_to_abx,
    get_scores_n_abx_info,
)
from src.utils_prediction import (
    report_metrics,
    train_n_evaluate_rf_model,
)

%load_ext autoreload
%autoreload 2
%matplotlib inline

USER input: define the inferred model and linked datasets to evaluate here:

In [None]:
#### USER INPUT START
# name of the model
model_name = "saved_models_microbial_novel_alpha_div2/id-55"
# which model version to evaluate: "best" or "last"
point_to_evaluate = "best"

# name of feature dataset used for model
ft_name = "ft_vat19_anomaly_v20240806_entero_genus"
# path to abx time-series file
path_to_abx_data = "../data/original_data/"
# name of abx time-series used for model
abx_ts_name = "ts_vat19_abx_v20240806"

# limit evaluation to time range up to this many months (if None no limit is set
# and all scores are evaluated)
limit_months = 24.0

# scaling factor options:
scaling_factors_used = True

# if scaling_factors_used is True, then the following options are required:
# non-centered = "nc_std" or centered = "std"
stddev_type = "nc_std"
# moving average window size: 30 or 10
moving_avg = 10
# whether to include duplicates: "--RD-True" or ""
duplicates = ""

#### USER INPUT END

## Read and prepare data

In [None]:
base_path = f"../data/{model_name}/anomaly_detection/"

if scaling_factors_used:
    print("Scaling factors used.")
    folder_name = f"using-SF_{stddev_type}_z_scores--moving_avg-{moving_avg}-cummax-lower_bound-1{duplicates}"

    scores_path = f"{base_path}scores_{point_to_evaluate}_normal/{folder_name}/"
    evaluation_path = f"{base_path}evaluation_{point_to_evaluate}_unsupervised_pred_{stddev_type}_ma{moving_avg}{duplicates.replace("-", "_").lower()}/"
else:
    scores_path = f"{base_path}scores_{point_to_evaluate}_normal/"
    evaluation_path = f"{base_path}evaluation_{point_to_evaluate}_unsupervised_pred/"

if not os.path.exists(evaluation_path):
    os.makedirs(evaluation_path)

In [None]:
# get scores
noabx_train, noabx_val, abx_scores_flat, abx_df, abx_age_at_all = get_scores_n_abx_info(
    scores_path, ft_name, limit_months, abx_ts_name
)


## Define true targets

In [None]:
# ensure correct sorting
abx_scores_flat.sort_values(
    [
        "abx_max_count_ever",
        "max_abx_w_microbiome",
        "host_id",
        "day",
    ],
    ascending=[True, True, True, True],
    inplace=True,
)

In [None]:
# filter abx hosts by at least 1 microbiome sample prior to 1st abx exposure
print(abx_scores_flat.shape)
abx_scores_flat_f = _filter_hosts_w_microbiome_samples_prior_to_abx(
    abx_scores_flat, abx_age_at_all
)
abx_scores_flat_f.shape

In [None]:
abx_scores_flat_f["true_target"] = np.nan

# ! define true positives: 1st observed sample in first 1-3 months after 1st abx exposure
# select samples in first 3 months after 1st abx exposure
# * 1st abx exposure: abx_scores_flat_f["abx_any_cumcount"] == 1
# * any abx exposure: abx_scores_flat_f["abx_any_cumcount"] > 0
samples_after_1st = (abx_scores_flat_f["abx_any_cumcount"] == 1) & (
    abx_scores_flat_f["abx_any_last_t_dmonths"] <= 3.0
)

# identify first sample after 1st abx exposure per host
first_sample_idx = (
    abx_scores_flat_f[samples_after_1st]
    .groupby("host_id")["abx_any_last_t_dmonths"]
    .idxmin()
)
# * any sample after abx: use samples_after_1st directly instesad of first_sample_idx

# Set 'true_target' to True for these samples
abx_scores_flat_f.loc[first_sample_idx, "true_target"] = 1

# ! define true negatives: samples with abx_cumcount == 0
# TODO: add a prior sample!
abx_scores_flat_f.loc[abx_scores_flat_f["abx_any_cumcount"] == 0, "true_target"] = 0

abx_scores_flat_f["true_target"].value_counts(dropna=False)


In [None]:
# select only samples with true_target
unsupervised_subset = abx_scores_flat_f[abx_scores_flat_f["true_target"].notna()].copy()
print(unsupervised_subset["true_target"].value_counts(dropna=False))
unsupervised_subset.shape

## Define features needed for modelling

In [None]:
# get increase in score + alpha diversity from one step to the next and time
# duration between scores
unsupervised_subset = (
    unsupervised_subset.groupby("host_id")
    .apply(
        lambda x: x.assign(
            # relative score increase: now / previous
            score_0_rel_change=x["score_0"] / x["score_0"].shift(1),
            # diff in time between samples:
            month5_bin_diff=x["month5_bin"].diff(),
            # former alpha diversity value
            div_alpha_faith_pd_before=x["div_alpha_faith_pd"].shift(1),
        ),
        include_groups=False,
    )
    .reset_index()
)


## Score-based predictions

### S1: Set absolute threshold

In [None]:
df_results_absolute = pd.DataFrame()
df_results_absolute.index.name = "quantile"

for q in reversed([0.7, 0.8, 0.9, 0.95, 0.97, 0.99]):
    unsupervised_subset_th = unsupervised_subset.copy()

    # define predicted target: threshold inferred from noabx validation set
    print(f"Quantile: {q}")
    thresh = noabx_val["score_0"].quantile(q)
    unsupervised_subset_th["pred_target"] = unsupervised_subset_th["score_0"] > thresh

    # evaluate classification
    df_results_absolute = report_metrics(
        unsupervised_subset_th["true_target"],
        unsupervised_subset_th["pred_target"],
        df_results_absolute,
        q,
    )

df_results_absolute

### S2: Set relative score increase

In [None]:
df_results_relative = pd.DataFrame()
df_results_relative.index.name = "rel_increase"

for rel_inc in reversed([4, 3, 2.0, 1.75, 1.5, 1.4, 1.3, 1.2, 1.1, 1.05]):
    unsupervised_subset_rel = unsupervised_subset.copy()

    # define predicted target: rel_inc-ing of score within 3 months
    print(f"Relative increase: {rel_inc}")

    unsupervised_subset_rel["pred_target"] = 0

    pred_true = (unsupervised_subset_rel["score_0_rel_change"] >= rel_inc) & (
        unsupervised_subset_rel["month5_bin_diff"] <= 3.0
    )
    unsupervised_subset_rel.loc[pred_true, "pred_target"] = 1

    # evaluate classification
    df_results_relative = report_metrics(
        unsupervised_subset_rel["true_target"],
        unsupervised_subset_rel["pred_target"],
        df_results_relative,
        rel_inc,
    )

df_results_relative

### S3: RF-based classifier

In [None]:
df_results_ml = train_n_evaluate_rf_model(
    "true_target", ["score_0"], unsupervised_subset
)
df_results_ml

## Baseline predictions

### B1: Set absolute threshold

### B2: Set relative threshold

### B3: RF-based classifier : fully static

In [None]:
fts_for_b3_rf_static = [
    "month_bin",
    "diet_milk",
    "diet_weaning",
    "delivery_mode",
    "div_alpha_faith_pd",
]

df_results_rf_static = train_n_evaluate_rf_model(
    "true_target", fts_for_b3_rf_static, unsupervised_subset
)
df_results_rf_static

In [None]:
fts_for_b3_rf_semi_static = [
    "month_bin",
    "diet_milk",
    "diet_weaning",
    "delivery_mode",
    "div_alpha_faith_pd",
    "div_alpha_faith_pd_before",
]

df_results_rf_semi_static = train_n_evaluate_rf_model(
    "true_target", fts_for_b3_rf_semi_static, unsupervised_subset
)
df_results_rf_semi_static