In [1]:
"""
The purpose of this Jupyter notebook is to evaluate the performance of
checkpoint 55 of SENSE-PPI trained from scratch on the two test sets
employed by the authors.
"""

'\nThe purpose of this Jupyter notebook is to evaluate the performance of\ncheckpoint 55 of SENSE-PPI trained from scratch on the two test sets\nemployed by the authors.\n'

In [2]:
import os

import sys
sys.path.append(
    "/Users/jacobanter/Documents/Code/VACV_screen/HVIDB_pos_"
    "instances_with_nucleolus_neg_instances/Results"
)

In [3]:
import pandas as pd

import evaluation_utils

In [4]:
# When three columns are provided in the input TSV file, i.e. the TSV
# file harbouring PPI pairs to predict, SENSE-PPI interprets the third
# column as `label` column and adopts the ground truth labels rather
# than determining predicted labels based on the probability
# Therefore, the `label` column has to be removed from the TSV files
# and replaced with labels based on the predicted probabilities

# Iterate over the results TSV files and remove the `label` column
results_files = os.listdir("Results")

for result_file in results_files:
    file_path = os.path.join("Results", result_file)
    current_df = pd.read_csv(file_path, sep="\t")
    current_df.drop(labels="label", axis=1, inplace=True)
    current_df.to_csv(
        file_path, sep="\t", index=False
    )

In [5]:
# In a subsequent step, a new `label` column is introduced in each of
# the TSV files with labels based on the predicted probability
for result_file in results_files:
    file_path = os.path.join("Results", result_file)

    evaluation_utils.add_labels_based_on_probs(
        path_tsv_files=file_path,
        pred_col_name="preds",
        n_fold=1
    )

In [6]:
ground_truth_paths = [
    "/Users/jacobanter/Documents/Code/VACV_screen/HVIDB_pos_instances_"\
    "with_nucleolus_neg_instances/Fine-tuning/SENSE-PPI/Fine-tuning_"\
    "without_freezing/evaluation_on_authors'_test_data_set/human-"\
    "virus_test_data_set/hv_test_Epstein_c2_with_header.tsv",
    "/Users/jacobanter/Documents/Code/VACV_screen/HVIDB_pos_instances_"\
    "with_nucleolus_neg_instances/Fine-tuning/SENSE-PPI/Fine-tuning_"\
    "without_freezing/evaluation_on_authors'_test_data_set/human-"\
    "virus_test_data_set/hv_test_Influenza_c2_with_header.tsv"
]

test_data_set_name = ["Epstein-Barr", "Influenza"]

# Filter out results files harbouring only positive interactions
results_files = [
    "predictions_Epstein-Barr_virus.tsv",
    "predictions_Influenza_virus.tsv"
]

for i, result_file in enumerate(results_files):
    file_path = os.path.join("Results", result_file)

    evaluation_utils.evaluation_k_fold_cross_val(
        ground_truth_path=ground_truth_paths[i],
        splits_path=file_path,
        n_fold=1,
        probability_key="preds",
        model_name="SENSE-PPI from scratch on "\
            f"{test_data_set_name[i]} test set",
        output_path="results_SENSE-PPI_from_scratch_performance_"\
            f"on_{test_data_set_name[i]}_test_set.txt"
    )

Using 1-fold cross-validation, the metrics for SENSE-PPI from scratch on Epstein-Barr test set are as follows:
Accuracy:      0.9043706293706294 ± 0.0
Precision:     0.37383177570093457 ± 0.0
Recall:        0.07692307692307693 ± 0.0
F1-score:      0.12759170653907495 ± 0.0
Specificity:   0.9871153846153846 ± 0.0
MCC:           0.13587977640572002 ± 0.0
ROC AUC score: 0.7728412074704143 ± 0.0
AUPRC score    0.2444381917447864 ± 0.0
Using 1-fold cross-validation, the metrics for SENSE-PPI from scratch on Influenza test set are as follows:
Accuracy:      0.9074319840743198 ± 0.0
Precision:     0.3026315789473684 ± 0.0
Recall:        0.013990267639902677 ± 0.0
F1-score:      0.026744186046511628 ± 0.0
Specificity:   0.9967761557177616 ± 0.0
MCC:           0.04784468826171578 ± 0.0
ROC AUC score: 0.6916172630253195 ± 0.0
AUPRC score    0.23106866340193852 ± 0.0
