In [17]:
import os
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, roc_auc_score


def load_vectors(proj_name, run_name, split):
    class_output_dir = '/scratch/users/austin.zane/ucsf_fast/data/class_outputs/'
    run_split_dir = os.path.join(class_output_dir, proj_name, run_name, split)
    label_pairs_path = os.path.join(run_split_dir, 'pred_and_true_labels.npy')
    
    label_pairs = np.load(label_pairs_path)
    
    pred_labels = label_pairs[:, 0]
    true_labels = label_pairs[:, 1]
        
    return true_labels, pred_labels, label_pairs
    


def calculate_statistics(true_labels, predicted_probs, threshold=0.5):
    """
    Calculate various statistics given true labels and predicted probabilities.

    Args:
        true_labels (numpy.ndarray): True binary labels.
        predicted_probs (numpy.ndarray): Predicted probabilities.
        threshold (float): Threshold for converting probabilities to binary predictions.

    Returns:
        dict: Dictionary containing the calculated statistics.
    """
    label_ratio = sum(true_labels) / len(true_labels)
    pred_ratio = sum(predicted_probs) / len(predicted_probs)
    
    
    
    # Convert predicted probabilities to binary predictions
    predicted_labels = (predicted_probs >= threshold).astype(int)

    # Calculate confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)
    tn, fp, fn, tp = cm.ravel()
    # print(cm)
    # print(f'TP: {tp}, FP: {fp}, FN: {fn}, TN: {tn}')

    # Calculate statistics
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    ppv = tp / (tp + fp)
    npv = tn / (tn + fn)
    precision = precision_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    # auc = roc_auc_score(true_labels, predicted_probs)

    # Create a dictionary to store the statistics
    statistics = {
        'Sensitivity': sensitivity,
        'Specificity': specificity,
        'Positive Predictive Value': ppv,
        'Negative Predictive Value': npv,
        # 'Precision': precision,
        # 'Recall': recall,
        'Accuracy': accuracy,
         # 'AUC': auc
        'True label ratio': label_ratio,
        'Pred label ratio': pred_ratio,
    }

    return statistics

In [21]:
proj_name = 'resnet_test_05_20_24'
run_name = 'crimson-brook-1-dbbh1lmz'
split = 'val'

true_labels, predicted_probs, label_pairs = load_vectors(proj_name, run_name, split)
stats = calculate_statistics(true_labels, predicted_probs, threshold=0.5)

# Print the statistics
for metric, value in stats.items():
    print(f'{metric}: {value:.3f}')

Sensitivity: 0.125
Specificity: 0.874
Positive Predictive Value: 0.538
Negative Predictive Value: 0.459
Accuracy: 0.469
True label ratio: 0.541
Pred label ratio: 0.125


In [22]:
proj_name = 'resnet_test_05_20_24'
run_name = 'stoic-surf-2-b2p4lr0g'
split = 'val'

true_labels, predicted_probs, label_pairs = load_vectors(proj_name, run_name, split)
stats = calculate_statistics(true_labels, predicted_probs, threshold=0.5)

# Print the statistics
for metric, value in stats.items():
    print(f'{metric}: {value:.3f}')

Sensitivity: 0.071
Specificity: 0.747
Positive Predictive Value: 0.250
Negative Predictive Value: 0.406
Accuracy: 0.382
True label ratio: 0.541
Pred label ratio: 0.153
