# Evaluate Model

This notebook compares the performance of the trained ActiNet model to a balanced random forest model, similar to the pypi:accelerometer model.

In [None]:
import numpy as np
import pandas as pd
from glob import glob
import os
import matplotlib.pyplot as plt
from datetime import datetime

from actinet import __version__ as actinet_version
from actinet.models import ActivityClassifier, RFActivityClassifier
from actinet.prepare import load_all_and_make_windows, extract_accelerometer_features, \
    prepare_accelerometer_data
from actinet.evaluate import evaluate_models
from actinet.utils.utils import ACTIVITY_LABELS_DICT
from actinet.utils.eval_utils import *

WINSEC = 30 # seconds
SAMPLE_RATE = 100 # Hz
RESAMPLE_RATE = 30 # Hz
N_JOBS = 8 # Set to higher number for quicker execution, but don't exceed max.
DATAFILES = f"data/capture24/P[0-9][0-9][0-9].csv.gz"
ANNOFILE = f"data/capture24/annotation-label-dictionary.csv"
SAVEFOLDER = f"data/capture24"
ANNOLABEL = "Walmsley2020"


ACTIVITY_LABELS = list(ACTIVITY_LABELS_DICT[ANNOLABEL].keys())

out_dir = f"outputs/evaluation/{ANNOLABEL}/actinet_bbaa_{convert_version(actinet_version)}"

## Evaluate actinet against accelerometer

First we extract the features each of the capture 24 files using the accelerometer package

In [None]:
if len(glob("data/capture24/bbaa/P[0-9][0-9][0-9]-epoch.csv.gz")) != 151:
    extract_accelerometer_features(n_jobs=N_JOBS)

Next we prepare the participant accelerometer data into the expected shape, containing the X, Y, T and P

In [None]:
# Accelerometer feature data prepared
if len(glob(f"{SAVEFOLDER}/prepared/{ANNOLABEL}/*.npy")) == 4:
    X_bbaa = np.load(f"{SAVEFOLDER}/prepared/{ANNOLABEL}/X.npy")
    Y_bbaa = np.load(f"{SAVEFOLDER}/prepared/{ANNOLABEL}/Y.npy")
    T_bbaa = np.load(f"{SAVEFOLDER}/prepared/{ANNOLABEL}/T.npy")
    P_bbaa = np.load(f"{SAVEFOLDER}/prepared/{ANNOLABEL}/pid.npy")
else:
    X_bbaa, Y_bbaa, T_bbaa, P_bbaa = prepare_accelerometer_data(ANNOFILE, ANNOLABEL, SAVEFOLDER, N_JOBS)

In [None]:
# Actinet data prepared
if len(glob(f"{SAVEFOLDER}/30s/{ANNOLABEL}/*.npy")) == 4:
    X_actinet = np.load(f"{SAVEFOLDER}/30s/{ANNOLABEL}/X.npy")
    Y_actinet = np.load(f"{SAVEFOLDER}/30s/{ANNOLABEL}/Y.npy")
    T_actinet = np.load(f"{SAVEFOLDER}/30s/{ANNOLABEL}/T.npy")
    P_actinet = np.load(f"{SAVEFOLDER}/30s/{ANNOLABEL}/pid.npy")

else:
    X_actinet, Y_actinet, T_actinet, P_actinet = load_all_and_make_windows(
        datafiles=glob(DATAFILES), 
        annofile=ANNOFILE, 
        out_dir=SAVEFOLDER, 
        anno_label=ANNOLABEL,
        sample_rate=SAMPLE_RATE,
        winsec=WINSEC,
        n_jobs=N_JOBS,
        downsampling_method="linear",
        lowpass_hz=None,
        resample_rate=RESAMPLE_RATE,
    )

Evaluate model using 5 fold stratified group cross validation

In [None]:
actinet_res_path = out_dir + "/actinet_results.pkl"
bbaa_res_path = out_dir + "/rf_results.pkl"

if not os.path.exists(actinet_res_path) or not os.path.exists(bbaa_res_path):
    os.makedirs(out_dir+'/models', exist_ok=True)

    rf_params = {
        "winsec": WINSEC,
        "labels": ACTIVITY_LABELS,
        "hmm_handle_sleep_transitions": True,
        "hmm_ignore_transition_gaps": False,
        "n_estimators": 1000,
        "sampling_strategy": "not minority",
        "replacement": True,
        "n_jobs": N_JOBS,
        "verbose": 1
    }

    actinet_params = {
        "labels": ACTIVITY_LABELS,
        "batch_size": 1000,
        "device": "cuda:0",
        "hmm_handle_sleep_transitions": True,
        "hmm_ignore_transition_gaps": False,
        "verbose": True
    }

    eval_params = {
        "rf_params": rf_params,
        "data_params": {
            "sample_rate": SAMPLE_RATE,
            "resample_rate": RESAMPLE_RATE,
            "winsec": WINSEC,
            "n_jobs": N_JOBS,
            "downsampling_method": "linear",
            "datafiles": DATAFILES,
            "annofile": ANNOFILE,
            "savefolder": SAVEFOLDER,
            "anno_label": ANNOLABEL
        },
        "actinet_params": actinet_params
    }

    bbaa_classifier = RFActivityClassifier(**rf_params)

    actinet_classifier = ActivityClassifier(**actinet_params)

    res = evaluate_models(
        actinet_classifier,
        bbaa_classifier,
        X_actinet,
        X_bbaa,
        Y_actinet,
        Y_bbaa,
        P_actinet,
        P_bbaa,
        T_actinet,
        T_bbaa,
        weights_path=out_dir+"/models/actinet_fold_{}.pt",
        out_dir=out_dir,
        verbose=True,
    )

results_bbaa = pd.read_pickle(bbaa_res_path)
results_actinet = pd.read_pickle(actinet_res_path)

In [None]:
fold_pid_df = pd.DataFrame([{'Fold': fold+1, "Test Participant IDs": ", ".join(sorted(set(group)))} for fold, group in results_actinet["group"].items()]).set_index("Fold")
fold_pid_df.to_csv("outputs/actinet_vs_bbaa/fold_pids.csv")
fold_pid_df

In [None]:
data = {
    'accelerometer': {'y': np.hstack(results_bbaa["Y_true"]), 
                      'y_pred': np.hstack(results_bbaa["Y_pred"]), 
                      'pid': np.hstack(results_bbaa["group"])
                      },
    'actinet': {'y': np.hstack(results_actinet["Y_true"]), 
                'y_pred': np.hstack(results_actinet["Y_pred"]), 
                'pid': np.hstack(results_actinet["group"])
                }
}

In [None]:
results = []

for model, model_data in data.items():
    for pid in np.unique(model_data['pid']):
        mask = model_data['pid'] == pid
        y_true = model_data['y'][mask]
        y_pred = model_data['y_pred'][mask]
        accuracy, f1, kappa, bacc = calculate_metrics(y_true, y_pred)
        results.append({'Participant': pid, 'Model': model, "Balanced Accuracy": bacc,
                        'Accuracy': accuracy, 'Macro F1': f1, 'Cohen Kappa': kappa,
                        'Predicted': y_pred, 'True': y_true, "Pred_dict": DivDict(pd.value_counts(y_pred).to_dict())/120,
                        "True_dict": DivDict(pd.value_counts(y_true).to_dict())/120, 
                        "Len": len(y_true)})

results = pd.DataFrame(results)

In [None]:
# Group by model and calculate mean and standard deviation
summary = results.groupby('Model')[['Accuracy',
                                    'Balanced Accuracy',
                                    'Cohen Kappa',
                                    'Macro F1']].agg(lambda x: f"{np.mean(x):.3f} " + 
                                                                  f"± {np.std(x):.3f}")

summary

In [None]:
metadata = pd.read_csv("data/capture24/metadata.csv")
results_df = results.merge(metadata, left_on="Participant", right_on="pid")

sex_mapping = {'F': 'Female', 'M': 'Male'}
results_df['Sex'] = pd.Categorical(results_df['sex'].map(sex_mapping), ordered=True,
                                   categories=sex_mapping.values())

results_df['Age Band'] = pd.Categorical(results_df['age'], ordered=True,
                                        categories=['18-29', '30-37', '38-52', '53+'])

results_df.drop(columns=["age", "sex", "pid"], inplace=True)

Paired t-test

In [None]:
from scipy.stats import ttest_rel

metrics = ['Accuracy', 'Balanced Accuracy', 'Cohen Kappa', 'Macro F1']
p_values = {}

for metric in metrics:
    acc_values = results_df.loc[results_df['Model'] == 'accelerometer', metric].values
    actinet_values = results_df.loc[results_df['Model'] == 'actinet', metric].values
    _, p_value = ttest_rel(acc_values, actinet_values)
    p_values[metric] = p_value

p_values

In [None]:
plot_model_performance(results, 'Accuracy', modulus=10)

In [None]:
plot_model_performance(results, 'Macro F1', 10)

In [None]:
plot_model_performance(results, 'Cohen Kappa', 10)

In [None]:
def plot_boxplots(df, x, y='Macro F1', hue='Model'):
    """Plots boxplots of model performance by a specified variable"""
    _, ax = plt.subplots(figsize=(5, 3), dpi=1000)
    with sns.color_palette("Set1"):
        sns.boxplot(data=df, x=x, y=y, hue=hue, ax=ax)
    ax.set_xlabel("Age Band")
    ax.set_ylabel(f"Cohen's kappa score")
    plt.title(f"Performance by {x}")
    
    handles, _ = ax.get_legend_handles_labels()
    ax.legend(handles, ["Baseline", "ActiNet"], title=hue)
    plt.show()

In [None]:
plot_boxplots(results_df, 'Age Band', y="Cohen Kappa")

In [None]:
plot_boxplots(results_df, 'Sex',  y="Cohen Kappa")

In [None]:
# Group by model and calculate mean and standard deviation
results_df.groupby(['Age Band', 'Model'])[['Cohen Kappa']].agg(lambda x: f"{np.mean(x):.3f} " + 
                                                                  f"± {np.std(x):.3f}")

In [None]:
# Group by model and calculate mean and standard deviation
results_df.groupby(['Sex', 'Model'])[['Cohen Kappa']].agg(lambda x: f"{np.mean(x):.3f} " + 
                                                                  f"± {np.std(x):.3f}")

Confusion matrices

In [None]:
generate_confusion_matrices(results_df, ACTIVITY_LABELS, save_path=out_dir+"/plots/conf_full_population.png", fontsize=18)
generate_confusion_matrices(results_df, ACTIVITY_LABELS, group_by="Sex", save_path=out_dir+"/plots/conf_by_sex.png", fontsize=18)
generate_confusion_matrices(results_df, ACTIVITY_LABELS, group_by="Age Band", save_path=out_dir+"/plots/conf_by_age.png", fontsize=18)

Bland-Altman plots

In [None]:
generate_bland_altman_plots(results_df, ACTIVITY_LABELS, ANNOLABEL, save_path=out_dir+"/plots/ba_full_population.png")
generate_bland_altman_plots(results_df[results_df["Sex"]=="Female"], ACTIVITY_LABELS, ANNOLABEL, subset="female",
                            save_path=out_dir+"/plots/ba_by_sex_female.png")
generate_bland_altman_plots(results_df[results_df["Sex"]=="Male"], ACTIVITY_LABELS, ANNOLABEL, subset="male",
                            save_path=out_dir+"/plots/ba_by_sex_male.png")
generate_bland_altman_plots(results_df, ACTIVITY_LABELS, ANNOLABEL, group_by="Age Band", 
                            save_path=out_dir+"/plots/ba_by_age.png")

In [None]:
generate_bland_altman_plots(results_df, ACTIVITY_LABELS, ANNOLABEL, 
                            save_path=out_dir+"/plots/ba_true_actinet_full_population.png", compare_to_true='actinet')
generate_bland_altman_plots(results_df[results_df["Sex"]=="Female"], ACTIVITY_LABELS, ANNOLABEL, subset="female",
                            save_path=out_dir+"/plots/ba_true_actinet_by_sex_female.png", compare_to_true='actinet')
generate_bland_altman_plots(results_df[results_df["Sex"]=="Male"], ACTIVITY_LABELS, ANNOLABEL, subset="male",
                            save_path=out_dir+"/plots/ba_true_actinet_by_sex_male.png", compare_to_true='actinet')
generate_bland_altman_plots(results_df, ACTIVITY_LABELS, ANNOLABEL, group_by="Age Band", 
                            save_path=out_dir+"/plots/ba_true_actinet_by_age.png", compare_to_true='actinet')

In [None]:
generate_bland_altman_plots(results_df, ACTIVITY_LABELS, ANNOLABEL, 
                            save_path=out_dir+"/plots/ba_true_bbaa_full_population.png", compare_to_true='bbaa')
generate_bland_altman_plots(results_df[results_df["Sex"]=="Female"], ACTIVITY_LABELS, ANNOLABEL, subset="female",
                            save_path=out_dir+"/plots/ba_true_bbaa_by_sex_female.png", compare_to_true='bbaa')
generate_bland_altman_plots(results_df[results_df["Sex"]=="Male"], ACTIVITY_LABELS, ANNOLABEL, subset="male",
                            save_path=out_dir+"/plots/ba_true_bbaa_by_sex_male.png", compare_to_true='bbaa')
generate_bland_altman_plots(results_df, ACTIVITY_LABELS, ANNOLABEL, group_by="Age Band", 
                            save_path=out_dir+"/plots/ba_true_bbaa_by_age.png", compare_to_true='bbaa')

Erorrs

In [None]:
build_mae_table(results_df, ACTIVITY_LABELS)

In [None]:
plot_errors(results_df, ACTIVITY_LABELS, ANNOLABEL, save_path = out_dir + "/plots/errors_full_population.png")

In [None]:
plot_errors(results_df, ACTIVITY_LABELS, ANNOLABEL, group_by='Age Band', save_path = out_dir + "/plots/errors_by_age.png")

In [None]:
plot_errors(results_df, ACTIVITY_LABELS, ANNOLABEL, group_by='Sex', save_path=out_dir + "/plots/errors_by_sex.png")