### A VAGAN and a MCI/AD classifier are trained separately. Then the classifier is applied to the raw samples and the VAGAN preprocessed samples.

In [None]:
import os
os.chdir('/local/home/mhoerold/entrack')

In [None]:
import yaml
import copy
import pydoc
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from src.test_retest.mri.supervised_features import SliceClassification
from src.data.streaming.base import Group

## Some constants

In [None]:
vagan_label = "20180907-084937"
clf_label = "20180907-102051"
conversion_key = "mci_ad_conv_delta_3"
test_file = "data/20180905-143554/test.txt"
clf_folder = os.path.join("data", clf_label)
with open(os.path.join(clf_folder, "config.yaml"), 'r') as f:
    clf_config = yaml.load(f)

## Load classifier only

In [None]:
clf_only_config = copy.deepcopy(clf_config)
clf_only_config["params"]["streamer"]["class"] = "src.data.streaming.mri_streaming.MRIConversionSingleStream"
clf_only_config["params"]["streamer"]["class"] = pydoc.locate(clf_only_config["params"]["streamer"]["class"])
clf_only_config["params"]["streamer"]["params"]["stream_config"]["conversion_delta"] = 3
clf_only_config["params"]["streamer"]["params"]["stream_config"]["use_diagnoses"] = ["health_mci", "healthy_ad"]

clf_only_obj = SliceClassification(**clf_only_config["params"])
clf_only_est = tf.estimator.Estimator(
    model_fn=clf_only_obj.model_fn,
    model_dir=clf_folder,
    params=clf_only_config["params"]["params"]
)

## Load classifier with VAGAN preprocessing

In [None]:
clf_vagan_config = copy.deepcopy(clf_config)
clf_vagan_config["params"]["streamer"]["class"] = "src.data.streaming.vagan_preprocessing.VaganConversionFarPredictions"
clf_vagan_config["params"]["streamer"]["class"] = pydoc.locate(clf_vagan_config["params"]["streamer"]["class"])
clf_vagan_config["params"]["streamer"]["params"]["stream_config"]["conversion_delta"] = 3
clf_vagan_config["params"]["streamer"]["params"]["stream_config"]["vagan_steps"] = 6
clf_vagan_config["params"]["streamer"]["params"]["stream_config"]["vagan_label"] = vagan_label
clf_vagan_config["params"]["streamer"]["params"]["stream_config"]["cache_preprocessing"] = vagan_label
clf_vagan_config["params"]["streamer"]["params"]["stream_config"]["use_diagnoses"] = ["health_mci", "healthy_ad"]

clf_vagan_obj = SliceClassification(**clf_vagan_config["params"])
clf_vagan_est = tf.estimator.Estimator(
    model_fn=clf_vagan_obj.model_fn,
    model_dir=clf_folder,
    params=clf_vagan_config["params"]["params"]
)

## Load test data

In [None]:
file_ids = []
with open(test_file, 'r') as f:
    for line in f:
        fid = line.strip()
        file_ids.append(fid)
        
file_ids = clf_only_obj.streamer.select_file_ids(file_ids)

In [None]:
# labels
labels = [clf_only_obj.streamer.get_meta_info_by_key(fid, conversion_key) for fid in file_ids]
batches = [Group([fid]) for fid in file_ids]

clf_input_fn = clf_only_obj.streamer.get_input_fn_for_groups(batches)

vagan_input_fn = clf_vagan_obj.streamer.get_input_fn_for_groups(batches)

In [None]:
def predict_probabilities(est, input_fn):
    preds = est.predict(input_fn, ["probs"])
    res = []
    for pred in preds:
        res.append(pred["probs"][1])  ## probability of being AD
        
    return np.array(res)

In [None]:
t0_probs = predict_probabilities(clf_only_est, clf_input_fn)

In [None]:
vagan_probs = predict_probabilities(clf_vagan_est, vagan_input_fn)

In [None]:
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score, roc_auc_score

def specificity_score(y_true, y_pred):
    """
    Compute true negative rate.
    TN / (TN + FP)
    """
    TN = 0
    FP = 0
    for y_t, y_p in zip(y_true, y_pred):
        if y_t == 0 and y_p == 0:
            TN += 1
        if y_t == 0 and y_p == 1:
            FP += 1

    if TN + FP == 0:
        return 0
    return TN / (TN + FP)

def compute_scores(y_true, y_pred):
    funcs = [accuracy_score, recall_score, precision_score, specificity_score, f1_score]
    scores = {}
    names = []
    for f in funcs:
        s = f(y_true, y_pred)
        fname = f.__name__.split("_")[0]
        scores[fname] = round(s, 5)
        names.append(fname)
        
    return names, scores

def threshold_diff(labels, t0_probs, vagan_probs):
    all_eps = np.linspace(-1, 1, 200)
    
    expected = np.array(labels)
    diffs = vagan_probs - t0_probs
    accs = []
    all_scores = []
    best_score = {}
    best_eps = {}
    for eps in all_eps:
        predicted_conv = (diffs > eps).astype(np.float32)
        
        acc = np.mean(predicted_conv == expected)
        accs.append(acc)
        
        score_names, scores = compute_scores(labels, predicted_conv)
        for name in score_names:
            if name not in best_score:
                best_score[name] = scores
                best_eps[name] = eps
            elif scores[name] > best_score[name][name]:
                best_score[name] = scores
                best_eps[name] = eps
    
    print("Max acc {} for eps {}".format(np.max(accs), all_eps[np.argmax(accs)]))
    for k, v in best_score.items():
        print("scores for best {} (eps={})".format(k, round(best_eps[k], 3)))
        print(v)
        
    print("AUC score")
    print(roc_auc_score(labels, diffs))
    plt.figure()
    plt.plot(all_eps, accs, marker='o')
    plt.show()
    
def threshold_vagan_prob(labels, vagan_probs):
    all_eps = np.linspace(-1, 1, 200)
    
    expected = np.array(labels)
    accs = []
    all_scores = []
    best_score = {}
    best_eps = {}
    for eps in all_eps:
        predicted_conv = (vagan_probs > eps).astype(np.float32)
        
        acc = np.mean(predicted_conv == expected)
        accs.append(acc)
        
        score_names, scores = compute_scores(labels, predicted_conv)
        for name in score_names:
            if name not in best_score:
                best_score[name] = scores
                best_eps[name] = eps
            elif scores[name] > best_score[name][name]:
                best_score[name] = scores
                best_eps[name] = eps
    
    print("Max acc {} for eps {}".format(np.max(accs), all_eps[np.argmax(accs)]))
    for k, v in best_score.items():
        print("scores for best {} (eps={})".format(k, round(best_eps[k], 3)))
        print(v)
        
    print("AUC score")
    print(roc_auc_score(labels, vagan_probs))
    plt.figure()
    plt.plot(all_eps, accs, marker='o')
    plt.show()

In [None]:
threshold_diff(labels, t0_probs, vagan_probs)

In [None]:
threshold_vagan_prob(labels, vagan_probs)

In [None]:
np.mean(labels)

In [None]:
len(labels)

## GT t1 images

In [None]:
t1_fids = clf_only_obj.streamer.t1_fids

In [None]:
t1_batches = [Group([fid]) for fid in t1_fids]

clf_t1_input_fn = clf_only_obj.streamer.get_input_fn_for_groups(t1_batches)
t1_probs = predict_probabilities(clf_only_est, clf_t1_input_fn)

In [None]:
#for t0, t1, conv in zip(t0_probs, t1_probs, labels):
 #   print("{} {} {}".format(t0, t1, conv))

In [None]:
clf_only_obj.streamer.get_exact_age(t1_fids[4])

In [None]:
clf_only_obj.streamer.get_exact_age(file_ids[4])

In [None]:
threshold_diff(labels, t0_probs, t1_probs)
threshold_vagan_prob(labels, t1_probs)