Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions selene_sdk/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .performance_metrics import PerformanceMetrics
from .performance_metrics import visualize_roc_curves
from .performance_metrics import visualize_precision_recall_curves
from .performance_metrics import auc_u_test
from .config import load
from .config import load_path
from .config import instantiate
Expand Down
29 changes: 29 additions & 0 deletions selene_sdk/utils/performance_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from scipy.stats import rankdata

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double check that there are 2 spaces between the import and the logger line?


logger = logging.getLogger("selene")
Expand Down Expand Up @@ -247,6 +248,34 @@ def get_feature_specific_scores(data, get_feature_from_index_fn):
return feature_score_dict


def auc_u_test(labels, predictions):
"""
Outputs the area under the the ROC curve associated with a certain
set of labels and the predictions given by the training model.
Computed from the U statistic.

Parameters
----------
labels: numpy.ndarray
Known labels of values predicted by model. Must be one dimensional.
predictions: numpy.ndarray
Value predicted by user model. Must be one dimensional, with matching
dimension to `labels`

Returns
-------
float
AUC value of given label, prediction pairs

"""
len_pos = int(np.sum(labels))
len_neg = len(labels) - len_pos
rank_sum = np.sum(rankdata(predictions)[labels == 1])
u_value = rank_sum - (len_pos * (len_pos + 1)) / 2
auc = u_value / (len_pos * len_neg)
return auc

Comment thread
bmacedo-lgtm marked this conversation as resolved.

class PerformanceMetrics(object):
"""
Tracks and calculates metrics to evaluate how closely a model's
Expand Down