In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from utils import *

In [2]:
run_list = [0, 1, 2, 3, 4]
model_list = ['estimator_A', 'estimator_B', 'linear_regression', 'nn', 'vit']

def get_prediction_path(run, model):
    # run in [0, 1, 2, 3, 4]
    # model in ['estimator_A', 'estimator_B', 'linear_regression', 'nn']
    return f"./predictions/{run}_{model}.npy"

def get_train_test_indice_path(run):
    return f"./dataset/train_test_split_set_{run}.npy"

In [3]:
# load train and test data
# load raw data
col_list = ['model', 'run', 'ion_accuracy', 'ion_sensitivity', 'ion_specificity', 'precursor_accuracy', 'precursor_sensitivity', 'precursor_specificity', 'col_accuracy', 'col_sensitivity', 'col_specificity', 'ion_l1', 'ion_mse', 'ion_cosine', 'precursor_l1', 'precursor_mse', 'precursor_cosine', 'col_l1', 'col_mse', 'col_cosine']
data = []

In [4]:
def get_masked_spectral_distance(cosine):
    arccosine = np.arccos(cosine)
    return 1- 2 * arccosine / np.pi

In [5]:
# run = 0
# model = "nn"
for model in model_list:
# for model in ['vit']:
    for run in run_list:
        print(f"\n{model} - {run}")
        precursor_info_path = './dataset/precursor_info.tsv'
        matrix_path = './dataset/matrix_mz_prob_mean_var.npy'
        output_dir = './evaluation_outputs'
        indices_path = get_train_test_indice_path(run)

        Y_train, Y_test, precursor_train_df, precursor_test_df = load_train_test_prob(precursor_info_path, indices_path, matrix_path, return_X=False)

        predict_path = get_prediction_path(run, model)
        Y_train_pred, Y_test_pred, _, _ = load_train_test_prob(precursor_info_path, indices_path, predict_path, return_X=False, use_peak_mask=False)
        Y_test_mask = Y_test >= 0
        threshold = 0.001
        Y_test_pred[Y_test_pred < threshold] = 0
        Y_test[Y_test < threshold] = 0
        Y_test_pred_binary = np.where(Y_test_pred > 0, 1, 0)
        Y_test_binary = np.where(Y_test > 0, 1, 0)

        ion_acc_list, ion_sen_list, ion_spec_list, ion_l1_list, ion_mse_list, ion_cosine_list = [], [], [], [], [], []
        precursor_acc_list, precursor_sen_list, precursor_spec_list, precursor_l1_list, precursor_mse_list, precursor_cosine_list = [], [], [], [], [], []
        precursor_sa_list = []

        # 1. look at the TP, FP, TN, FN of if the predictions can show whether their if a fragment is present or not
        # columns: model, run, ion_accuracy, ion_sensitivity, ion_specificity, precursor_accuracy, precursor_sensitivity, precursor_specificity, globel accuracy, globel_sensitivity, globel_specificity, ion_l1, ion_mse, ion_cosine, precursor_l1, precursor_mse, precursor_cosine, globel_l1, globel_mse, globel_cosine
        row = [model, run]

        # frag ion level evaluation
        Y_test_flattened = Y_test[Y_test_mask].flatten()
        Y_test_pred_flattened = Y_test_pred[Y_test_mask].flatten()
        Y_test_flattened_binary = Y_test_binary[Y_test_mask].flatten()
        Y_test_pred_flattened_binary = Y_test_pred_binary[Y_test_mask].flatten()
        accuracy, sensitivity, specificity = get_accuracy_sensitivity_specificity(Y_test_flattened_binary, Y_test_pred_flattened_binary, threshold)
        row += [accuracy, sensitivity, specificity]
        print(f"frag ion level - Accuracy: {accuracy:.5f}, Sensitivity: {sensitivity:.5f}, Specificity: {specificity:.5f}")

        l1 = compute_l1_distance(Y_test_flattened, Y_test_pred_flattened)
        mse = compute_mse(Y_test_flattened, Y_test_pred_flattened)
        cosine = compute_cosine(Y_test_flattened, Y_test_pred_flattened)
        spectral_angle = get_masked_spectral_distance(cosine)
        row += [l1, mse, cosine]
        print(f"frag ion level - L1: {l1:.5f}, MSE: {mse:.5f}, Cosine: {cosine:.5f}, Spectral Angle: {spectral_angle:.5f}") 

        # precursor level evaluation
        num_precursors = precursor_test_df.shape[0]
        averaged_accuracy, averaged_sensitivity, averaged_specificity = 0, 0, 0
        averaged_l1, averaged_mse, averaged_cosine, averaged_spectral_angle = 0, 0, 0, 0
        for i in range(num_precursors):
            Y_test_flattened = Y_test[i][Y_test_mask[i]]
            Y_test_pred_flattened = Y_test_pred[i][Y_test_mask[i]]
            Y_test_flattened_binary = Y_test_binary[i][Y_test_mask[i]]
            Y_test_pred_flattened_binary = Y_test_pred_binary[i][Y_test_mask[i]]
            accuracy, sensitivity, specificity = get_accuracy_sensitivity_specificity(Y_test_flattened_binary, Y_test_pred_flattened_binary, threshold)
            averaged_accuracy += accuracy / num_precursors
            averaged_sensitivity += sensitivity / num_precursors
            averaged_specificity += specificity / num_precursors
            precursor_acc_list.append(accuracy)
            precursor_sen_list.append(sensitivity)
            precursor_spec_list.append(specificity)

            l1 = compute_l1_distance(Y_test_flattened, Y_test_pred_flattened)
            mse = compute_mse(Y_test_flattened, Y_test_pred_flattened)
            cosine = compute_cosine(Y_test_flattened, Y_test_pred_flattened)
            spectral_angle = get_masked_spectral_distance(cosine)
            averaged_l1 += l1 / num_precursors
            averaged_mse += mse / num_precursors
            averaged_cosine += cosine / num_precursors
            averaged_spectral_angle += spectral_angle / num_precursors
            precursor_l1_list.append(l1)
            precursor_mse_list.append(mse)
            precursor_cosine_list.append(cosine)
            precursor_sa_list.append(spectral_angle)
        row += [averaged_accuracy, averaged_sensitivity, averaged_specificity]
        print(f"precursor level - Accuracy: {averaged_accuracy:.5f}, Sensitivity: {averaged_sensitivity:.5f}, Specificity: {averaged_specificity:.5f}")
        row += [averaged_l1, averaged_mse, averaged_cosine]
        print(f"precursor level - L1: {averaged_l1:.5f}, MSE: {averaged_mse:.5f}, Cosine: {averaged_cosine:.5f}, Spectral Angle: {averaged_spectral_angle:.5f}")

        # per considered ion
        num_frag_ions = Y_test.shape[1]
        averaged_accuracy, averaged_sensitivity, averaged_specificity = 0, 0, 0
        averaged_l1, averaged_mse, averaged_cosine = 0, 0, 0
        averaged_spectral_angle = 0        # normalized spectral angle
        for i in range(num_frag_ions):
            y_test_col = Y_test[:, i][Y_test_mask[:, i]]
            y_test_pred_col = Y_test_pred[:, i][Y_test_mask[:, i]]
            y_test_col_binary = Y_test_binary[:, i][Y_test_mask[:, i]]
            y_test_pred_col_binary = Y_test_pred_binary[:, i][Y_test_mask[:, i]]
            accuracy, sensitivity, specificity = get_accuracy_sensitivity_specificity(y_test_col_binary, y_test_pred_col_binary, threshold)
            averaged_accuracy += accuracy / num_frag_ions
            averaged_sensitivity += sensitivity / num_frag_ions
            averaged_specificity += specificity / num_frag_ions
            ion_acc_list.append(accuracy)
            ion_sen_list.append(sensitivity)
            ion_spec_list.append(specificity)

            l1 = compute_l1_distance(y_test_col, y_test_pred_col)
            mse = compute_mse(y_test_col, y_test_pred_col)
            cosine = compute_cosine(y_test_col, y_test_pred_col)
            spectral_angle = get_masked_spectral_distance(cosine)
            averaged_l1 += l1 / num_frag_ions
            averaged_mse += mse / num_frag_ions
            averaged_cosine += cosine / num_frag_ions
            averaged_spectral_angle += spectral_angle / num_frag_ions
            ion_l1_list.append(l1)
            ion_mse_list.append(mse)
            ion_cosine_list.append(cosine)
        row += [averaged_accuracy, averaged_sensitivity, averaged_specificity]
        print(f"averaged over col - Accuracy: {averaged_accuracy:.5f}, Sensitivity: {averaged_sensitivity:.5f}, Specificity: {averaged_specificity:.5f}")
        row += [averaged_l1, averaged_mse, averaged_cosine]
        print(f"averaged over col - L1: {averaged_l1:.5f}, MSE: {averaged_mse:.5f}, Cosine: {averaged_cosine:.5f}, Spectral Angle: {averaged_spectral_angle:.5f}")

        data.append(row)



estimator_A - 0
matrix shape is 2d, convert it to 3d, shepe is:  (610117, 235, 1)
frag ion level - Accuracy: 0.65737, Sensitivity: 1.00000, Specificity: 0.00000
frag ion level - L1: 0.23976, MSE: 0.10040, Cosine: 0.73041, Spectral Angle: 0.52134
precursor level - Accuracy: 0.69918, Sensitivity: 1.00000, Specificity: 0.00000
precursor level - L1: 0.24360, MSE: 0.09915, Cosine: 0.75876, Spectral Angle: 0.55815
averaged over col - Accuracy: 0.44360, Sensitivity: 0.98723, Specificity: 0.00000
averaged over col - L1: 0.24018, MSE: 0.10620, Cosine: 0.39432, Spectral Angle: 0.27216

estimator_A - 1
matrix shape is 2d, convert it to 3d, shepe is:  (610117, 235, 1)
frag ion level - Accuracy: 0.65813, Sensitivity: 1.00000, Specificity: 0.00000
frag ion level - L1: 0.23964, MSE: 0.10044, Cosine: 0.73031, Spectral Angle: 0.52124
precursor level - Accuracy: 0.69994, Sensitivity: 1.00000, Specificity: 0.00000
precursor level - L1: 0.24354, MSE: 0.09925, Cosine: 0.75878, Spectral Angle: 0.55821
aver

In [6]:
# save the results
df = pd.DataFrame(data, columns=col_list)
df.to_csv(os.path.join(output_dir, "evaluation_results.csv"), index=False)