From 8899d9ca8d07a253ff0b1abad03995855b7d8713 Mon Sep 17 00:00:00 2001 From: "Briana E. Macedo" Date: Sun, 24 Nov 2019 13:15:51 -0500 Subject: [PATCH 1/3] Add to --- selene_sdk/utils/__init__.py | 1 + selene_sdk/utils/performance_metrics.py | 26 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/selene_sdk/utils/__init__.py b/selene_sdk/utils/__init__.py index 0c83d534..f2fcf51b 100644 --- a/selene_sdk/utils/__init__.py +++ b/selene_sdk/utils/__init__.py @@ -13,6 +13,7 @@ from .performance_metrics import PerformanceMetrics from .performance_metrics import visualize_roc_curves from .performance_metrics import visualize_precision_recall_curves +from .performance_metrics import auc_u_test from .config import load from .config import load_path from .config import instantiate diff --git a/selene_sdk/utils/performance_metrics.py b/selene_sdk/utils/performance_metrics.py index a2ed99cc..448f465b 100644 --- a/selene_sdk/utils/performance_metrics.py +++ b/selene_sdk/utils/performance_metrics.py @@ -11,7 +11,7 @@ from sklearn.metrics import precision_recall_curve from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_curve - +from scipy.stats import rankdata logger = logging.getLogger("selene") @@ -247,6 +247,30 @@ def get_feature_specific_scores(data, get_feature_from_index_fn): return feature_score_dict +def auc_u_test(labels, predictions): + + """ Outputs the area under the the ROC curve associated with a certain + set of labels and the predictions given by the training model. + + Parameters + ---------- + labels: numpy.ndarray + Known labels of values predicted by model. + + predictions: numpy.ndarray + Value predicted by user model + + Returns: + numpy.float64 + AUC value of given label, prediction pairs + """ + len_pos = int(np.sum(labels)) + len_neg = len(labels) - len_pos + rank_sum = np.sum(rankdata(predictions)[labels == 1]) + u_value = rank_sum - (len_pos * (len_pos + 1)) / 2 + auc = u_value / (len_pos * len_neg) + return auc + class PerformanceMetrics(object): """ Tracks and calculates metrics to evaluate how closely a model's From 1a6cd1a3c21c5a37811a729a15c3fb39faa7580c Mon Sep 17 00:00:00 2001 From: "Briana E. Macedo" Date: Tue, 26 Nov 2019 17:19:26 -0500 Subject: [PATCH 2/3] Update auc_u_test --- selene_sdk/utils/performance_metrics.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/selene_sdk/utils/performance_metrics.py b/selene_sdk/utils/performance_metrics.py index 448f465b..c8f4193d 100644 --- a/selene_sdk/utils/performance_metrics.py +++ b/selene_sdk/utils/performance_metrics.py @@ -249,20 +249,26 @@ def get_feature_specific_scores(data, get_feature_from_index_fn): def auc_u_test(labels, predictions): - """ Outputs the area under the the ROC curve associated with a certain + """ + Outputs the area under the the ROC curve associated with a certain set of labels and the predictions given by the training model. + Computed from the U statistic. Parameters ---------- labels: numpy.ndarray - Known labels of values predicted by model. - + Known labels of values predicted by model. + Must be one dimensional. predictions: numpy.ndarray - Value predicted by user model + Value predicted by user model + Must be one dimensional, with matching + dimension to `labels` - Returns: - numpy.float64 - AUC value of given label, prediction pairs + Returns + ------- + float64 + AUC value of given label, prediction pairs + """ len_pos = int(np.sum(labels)) len_neg = len(labels) - len_pos @@ -271,6 +277,7 @@ def auc_u_test(labels, predictions): auc = u_value / (len_pos * len_neg) return auc + class PerformanceMetrics(object): """ Tracks and calculates metrics to evaluate how closely a model's From 70313a408e5d178945217e86b15ebb2cd36b80c4 Mon Sep 17 00:00:00 2001 From: "Briana E. Macedo" Date: Sat, 30 Nov 2019 16:45:03 -0500 Subject: [PATCH 3/3] update auc_u_test final --- selene_sdk/utils/performance_metrics.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/selene_sdk/utils/performance_metrics.py b/selene_sdk/utils/performance_metrics.py index c8f4193d..201bf23c 100644 --- a/selene_sdk/utils/performance_metrics.py +++ b/selene_sdk/utils/performance_metrics.py @@ -13,6 +13,7 @@ from sklearn.metrics import roc_curve from scipy.stats import rankdata + logger = logging.getLogger("selene") @@ -248,7 +249,6 @@ def get_feature_specific_scores(data, get_feature_from_index_fn): def auc_u_test(labels, predictions): - """ Outputs the area under the the ROC curve associated with a certain set of labels and the predictions given by the training model. @@ -257,16 +257,14 @@ def auc_u_test(labels, predictions): Parameters ---------- labels: numpy.ndarray - Known labels of values predicted by model. - Must be one dimensional. + Known labels of values predicted by model. Must be one dimensional. predictions: numpy.ndarray - Value predicted by user model - Must be one dimensional, with matching + Value predicted by user model. Must be one dimensional, with matching dimension to `labels` Returns ------- - float64 + float AUC value of given label, prediction pairs """