Ejercicio 5.C
Con modelo que has elegido, cárgalo desde disco en otro Jupyter notebook y realiza inferencia de 10 pacientes.

Para cada paciente:

Indica su prevalencia (te la tienes que inventar de forma aleatoria)
Indica la probabilidad de que la red haya acertado.

In [308]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

RANDOM_SEED = 42
tf.keras.utils.set_random_seed(RANDOM_SEED)

In [309]:
def get_matrix(y_true, y_score, threshold=.5):
    y_pred = y_score >= threshold

    tp = np.sum((y_true == 1) & (y_pred == 1))
    tn = np.sum((y_true == 0) & (y_pred == 0))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))

    return tp, tn, fp, fn

In [310]:
def get_metrics(tp, tn, fp, fn):
    prevalence = (tp + fn) / (tp + tn + fp + fn)
    sensibility = tp / (tp + fn)
    specificity = tn / (fp + tn)

    return prevalence, sensibility, specificity

In [311]:
def get_precision_bayes(sensibility, specificity, prevalence):
    return (sensibility * prevalence) / (sensibility * prevalence + (1 - specificity) * (1 - prevalence))

In [312]:
def get_vpn_bayes(sensibility, specificity, prevalence):
    return (specificity * (1 - prevalence)) / (specificity * (1 - prevalence) + (1 - sensibility) * prevalence)

In [313]:
def specificity(y_true, y_score):
    threshold = 0.5
    y_pred = tf.cast(tf.greater_equal(y_score, threshold), tf.float32)

    true_negatives = tf.logical_and(tf.equal(y_true, 0), tf.equal(y_pred, 0))
    num_true_negatives = tf.reduce_sum(tf.cast(true_negatives, tf.float32))

    negatives = tf.equal(y_true, 0)
    num_negatives = tf.reduce_sum(tf.cast(negatives, tf.float32))

    specificity = num_true_negatives / (num_negatives + tf.keras.backend.epsilon())

    return specificity

In [314]:
model = load_model('model.h5', custom_objects={'specificity': specificity})

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [316]:
y_score = model.predict(X_test)
tp, tn, fp, fn = get_matrix(y_test, y_score[:, 0])
prevalence, sensibility, specificity = get_metrics(tp, tn, fp, fn)
precision = get_precision_bayes(sensibility, specificity, prevalence)
vpn = get_vpn_bayes(sensibility, specificity, prevalence)

In [317]:
print("Global metrics")
print(f'TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}')
print(f'Prevalencia: {prevalence:.2f}')
print(f'Sensibilidad: {sensibility:.2f}')
print(f'Especificidad: {specificity:.2f}')
print(f'Precisión: {precision:.2f}')
print(f'VPN: {vpn:.2f}')

Global metrics
TP: 63, TN: 40, FP: 5, FN: 6
Prevalencia: 0.61
Sensibilidad: 0.91
Especificidad: 0.89
Precisión: 0.93
VPN: 0.87


In [334]:
# Get 10 random tests patients and make predictions
np.random.seed(12)
random_indices = np.random.choice(X_test.shape[0], 10, replace=False)

X_test_random = X_test[random_indices]
# y_test_random = y_test[random_indices]

for i in range(10):
    X_sample = X_test_random[i]
    X_sample = X_sample.reshape(1, -1)
    y_sample = model.predict(X_sample, verbose=False)

    sample_prevalence = np.random.uniform(.1, .9)
    sample_precision = get_precision_bayes(sensibility, specificity, sample_prevalence)
    sample_vpn = get_vpn_bayes(sensibility, specificity, sample_prevalence)
    sample_class = 1 if y_sample[0][0] >= 0.5 else 0
    print(
        f'Patient {i + 1} (id {random_indices[i]}) and the score is {y_sample[0][0]:.2f}. Prevalence: {sample_prevalence:.2f}. Precision: {sample_precision:.2f}. VPN: {sample_vpn:.2f}'
    )

Patient 1 (id 66) and the score is 0.25. Prevalence: 0.62. Precision: 0.93. VPN: 0.86
Patient 2 (id 69) and the score is 0.80. Prevalence: 0.33. Precision: 0.80. VPN: 0.95
Patient 3 (id 17) and the score is 0.96. Prevalence: 0.44. Precision: 0.87. VPN: 0.93
Patient 4 (id 71) and the score is 0.25. Prevalence: 0.40. Precision: 0.85. VPN: 0.94
Patient 5 (id 11) and the score is 0.88. Prevalence: 0.20. Precision: 0.68. VPN: 0.98
Patient 6 (id 8) and the score is 0.90. Prevalence: 0.58. Precision: 0.92. VPN: 0.88
Patient 7 (id 19) and the score is 0.25. Prevalence: 0.16. Precision: 0.61. VPN: 0.98
Patient 8 (id 21) and the score is 0.94. Prevalence: 0.39. Precision: 0.84. VPN: 0.94
Patient 9 (id 14) and the score is 0.92. Prevalence: 0.46. Precision: 0.88. VPN: 0.92
Patient 10 (id 26) and the score is 0.88. Prevalence: 0.82. Precision: 0.97. VPN: 0.69
