# Evaluate Model

This notebook compares the performance of the trained ActiNet model to a balanced random forest model, similar to the pypi:accelerometer model.

In [None]:
import numpy as np
import pandas as pd
from glob import glob
import sys
from imblearn.ensemble import BalancedRandomForestClassifier
import os

sys.path.append("src")
from actinet.models import ActivityClassifier
from actinet.prepare import load_all_and_make_windows, extract_accelerometer_features, \
    prepare_accelerometer_data
from actinet.evaluate import evaluate_models
from actinet.utils.eval_utils import *

WINSEC = 30 # seconds
SAMPLE_RATE = 100 # Hz
RESAMPLE_RATE = 30 # Hz
N_JOBS = 8 # Set to higher number for quicker execution, but don't exceed max.

DATAFILES = f"data/capture24/P[0-9][0-9][0-9].csv.gz"
ANNOFILE = f"data/capture24/annotation-label-dictionary.csv"
SAVEFOLDER = f"data/capture24"

ACTIVITY_LABELS = ["light", "moderate-vigorous", "sedentary", "sleep"]

## Evaluate actinet against accelerometer

First we extract the features each of the capture 24 files using the accelerometer package

In [None]:
if len(glob("data/capture24/bbaa/P[0-9][0-9][0-9]-epoch.csv.gz")) != 151:
    extract_accelerometer_features(n_jobs=N_JOBS)

Next we prepare the participant accelerometer data into the expected shape, containing the X, Y, T and P

In [None]:
# Accelerometer feature data prepared
if len(glob(f"{SAVEFOLDER}/prepared/accelerometer/*.npy")) == 4:
    X_bbaa = np.load(f"{SAVEFOLDER}/prepared/accelerometer/X.npy")
    Y_bbaa = np.load(f"{SAVEFOLDER}/prepared/accelerometer/Y.npy")
    T_bbaa = np.load(f"{SAVEFOLDER}/prepared/accelerometer/T.npy")
    P_bbaa = np.load(f"{SAVEFOLDER}/prepared/accelerometer/pid.npy")

else:
    X_bbaa, Y_bbaa, T_bbaa, P_bbaa = prepare_accelerometer_data(ANNOFILE, SAVEFOLDER, N_JOBS)

In [None]:
# Actinet data prepared
if len(glob(f"{SAVEFOLDER}/prepared/downsampling_linear_lowpass_None/*.npy")) == 4:
    X_actinet = np.load(f"{SAVEFOLDER}/prepared/downsampling_linear_lowpass_None/X.npy")
    Y_actinet = np.load(f"{SAVEFOLDER}/prepared/downsampling_linear_lowpass_None/Y.npy")
    T_actinet = np.load(f"{SAVEFOLDER}/prepared/downsampling_linear_lowpass_None/T.npy")
    P_actinet = np.load(f"{SAVEFOLDER}/prepared/downsampling_linear_lowpass_None/pid.npy")

else:
    X_actinet, Y_actinet, T_actinet, P_actinet = load_all_and_make_windows(
        datafiles=glob(DATAFILES), 
        annofile=ANNOFILE, 
        out_dir=SAVEFOLDER, 
        anno_label="Walmsley2020",
        sample_rate=SAMPLE_RATE,
        winsec=WINSEC,
        n_jobs=N_JOBS,
        downsampling_method="linear",
        lowpass_hz=None,
        resample_rate=RESAMPLE_RATE,
    )

Evaluate model using 5 fold stratified group cross validation

In [None]:
actinet_res_path = "outputs/actinet_vs_bbaa/actinet_results.pkl"
bbaa_res_path = "outputs/actinet_vs_bbaa/rf_results.pkl"

if not os.path.exists(actinet_res_path) or not os.path.exists(bbaa_res_path):
    bbaa_classifier = BalancedRandomForestClassifier(
        n_estimators=1000,
        oob_score=True,
        sampling_strategy="not minority",
        replacement=True,
        n_jobs=N_JOBS,
        random_state=42,
        verbose=1
    )

    actinet_classifier = ActivityClassifier(
        labels = np.unique(Y_actinet),
        batch_size=1000,
        device="cuda:0",
        verbose=True
    )

    res = evaluate_models(
        actinet_classifier,
        bbaa_classifier,
        X_actinet,
        X_bbaa,
        Y_actinet,
        Y_bbaa,
        P_actinet,
        P_bbaa,
        T_actinet,
        T_bbaa,
        weights_path="models/evaluation_models/actinet_vs_bbaa_{}.pt",
        out_dir="outputs/actinet_vs_bbaa",
        verbose=True,
    )

results_bbaa = pd.read_pickle(bbaa_res_path)
results_actinet = pd.read_pickle(actinet_res_path)

In [None]:
fold_pid_df = pd.DataFrame([{'Fold': fold+1, "Test Participant IDs": ", ".join(sorted(set(group)))} for fold, group in results_actinet["group"].items()]).set_index("Fold")
fold_pid_df.to_csv("outputs/actinet_vs_bbaa/fold_pids.csv")
fold_pid_df

In [None]:
data = {
    'accelerometer': {'y': np.hstack(results_bbaa["Y_true"]), 
                      'y_pred': np.hstack(results_bbaa["Y_pred"]), 
                      'pid': np.hstack(results_bbaa["group"])
                      },
    'actinet': {'y': np.hstack(results_actinet["Y_true"]), 
                'y_pred': np.hstack(results_actinet["Y_pred"]), 
                'pid': np.hstack(results_actinet["group"])
                }
}

In [None]:
results = []

for model, model_data in data.items():
    for pid in np.unique(model_data['pid']):
        mask = model_data['pid'] == pid
        y_true = model_data['y'][mask]
        y_pred = model_data['y_pred'][mask]
        accuracy, f1, kappa, bacc = calculate_metrics(y_true, y_pred)
        results.append({'Participant': pid, 'Model': model, "Balanced Accuracy": bacc,
                        'Accuracy': accuracy, 'Macro F1': f1, 'Cohen Kappa': kappa,
                        'Predicted': y_pred, 'True': y_true, "Pred_dict": DivDict(pd.value_counts(y_pred).to_dict())/120,
                        "True_dict": DivDict(pd.value_counts(y_true).to_dict())/120, 
                        "Len": len(y_true)})

results = pd.DataFrame(results)

In [None]:
# Group by model and calculate mean and standard deviation
summary = results.groupby('Model')[['Accuracy',
                                    'Balanced Accuracy',
                                    'Cohen Kappa',
                                    'Macro F1']].agg(lambda x: f"{np.mean(x):.3f} " + 
                                                                  f"± {np.std(x):.3f}")

summary

In [None]:
metadata = pd.read_csv("data/capture24/metadata.csv")
results_df = results.merge(metadata, left_on="Participant", right_on="pid")

sex_mapping = {'F': 'Female', 'M': 'Male'}
results_df['Sex'] = pd.Categorical(results_df['sex'].map(sex_mapping), ordered=True,
                                   categories=sex_mapping.values())

results_df['Age Band'] = pd.Categorical(results_df['age'], ordered=True,
                                        categories=['18-29', '30-37', '38-52', '53+'])

results_df.drop(columns=["age", "sex", "pid"], inplace=True)

Difference boxplots

In [None]:
plot_difference_boxplots(results_df)

In [None]:
plot_model_performance(results, 'Accuracy', modulus=10)

In [None]:
plot_model_performance(results, 'Macro F1', 10)

In [None]:
plot_model_performance(results, 'Cohen Kappa', 10)

In [None]:
plot_boxplots(results_df, 'Age Band')

In [None]:
plot_boxplots(results_df, 'Sex')

Confusion matrices

In [None]:
generate_confusion_matrices(results_df, save_path="outputs/actinet_vs_bbaa/full_population.pdf", fontsize=18)
generate_confusion_matrices(results_df, group_by="Sex", save_path="outputs/actinet_vs_bbaa/by_sex.pdf", fontsize=18)
generate_confusion_matrices(results_df, group_by="Age Band", save_path="outputs/actinet_vs_bbaa/by_age.pdf", fontsize=18)

Bland-Altman plots

In [None]:
generate_bland_altman_plots(results_df, save_path="outputs/actinet_vs_bbaa/bland_altman/full_population.pdf")
generate_bland_altman_plots(results_df[results_df["Sex"]=="Female"], subset="female",
                            save_path="outputs/actinet_vs_bbaa/bland_altman/by_sex_female.pdf")
generate_bland_altman_plots(results_df[results_df["Sex"]=="Male"], subset="male",
                            save_path="outputs/actinet_vs_bbaa/bland_altman/by_sex_male.pdf")
generate_bland_altman_plots(results_df, group_by="Age Band", save_path="outputs/actinet_vs_bbaa/bland_altman/by_age.pdf")