# Precisión y sensibilidad

A pesar de que se ha utilizado la precisión como medida principal del rendimiento de los modelos entrenados, la información que da puede ser un poco engañosa en el caso de bases de datos desequilibradas. En este notebook se intentará calcular la precisión y sensibilidad de los modelos de CNN entrenados en la base de datos original y en la aumentada, para ver si se puede hacer un análisis más en profundidad.

Un sistema con precisión y sensibilidad alta devuelve resultados precisos, y la mayoría de resultados son realmente positivos.

Un sistema con precisión baja y sensibilidad alta devuelve muchos resultados, pero la mayoría de las etiquetas predichas son incorrectas, al comparar con las eqtiquetas de entrenamiento. Un sistema con precisión alta y sensibilidad baja have lo contrario, devolviendo pocos resultados pero etiquetados correctamente generalmente.

https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html

In [2]:
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import sys

sys.path.append('/home/jovyan/work/databases/HASY/')
from hasy_tools_updated import *

In [15]:
import sklearn.metrics as metrics

In [24]:
X_train, y_train, symbol_id2index_train, index2symbol_id_train = load_database(f'/home/jovyan/work/databases/HASY/benat-data/train.csv')
X_test, y_test, symbol_id2index_test, index2symbol_id_test = load_database(f'/home/jovyan/work/databases/HASY/benat-data/test.csv')

In [25]:
y_train_id = np.array([index2symbol_id_train[element] for element in y_train])
y_test_id = np.array([index2symbol_id_test[element] for element in y_test])

# Go back to indexes, but use the SAME indexing SYSTEM for all:
y_train = np.array([symbol_id2index_test[element] for element in y_train_id])
y_test = np.array([symbol_id2index_test[element] for element in y_test_id])    

# Data scaling
X_train, X_test = X_train / 255., X_test/ 255.
y_train, y_test = y_train.astype(int), y_test.astype(int)
print("Data scaled", end='\n\n')

# Reshape the data to 2D:
X_train = X_train.reshape(X_train.shape[0],32,32)[..., np.newaxis]
X_test = X_test.reshape(X_test.shape[0],32,32)[..., np.newaxis]

Data scaled



## Sistema entrenado en la base de datos original

In [26]:
# path_original = '/home/jovyan/work/models/04_CNN/00_MNIST_model/Test_performance/models/best_model.h5'
model_original_data = keras.models.load_model('04_CNN/00_MNIST_model/Test_performance/models/best_model.h5')

In [27]:
y_proba = model_original_data.predict(X_test, verbose=1)
y_pred = np.argmax(y_proba, axis=-1)
y_pred_id = np.array([index2symbol_id_test[element] for element in y_pred])



In [73]:
accuracy = metrics.accuracy_score(y_test, y_pred)
print(f"TOP1: {accuracy*100:.4f} %")
print(f"TOP3: {metrics.top_k_accuracy_score(y_test, y_proba, k=3)*100:.4f} %")
print(f"TOP5: {metrics.top_k_accuracy_score(y_test, y_proba, k=5)*100:.4f} %")
print(f"MER : {MER(y_test_id, y_pred_id)*100:.4f} %")


TOP1: 80.8036 %
TOP3: 95.1288 %
TOP5: 97.0012 %
MER : 89.5147 %


In [39]:
# Precisión

mean_precision = metrics.precision_score(y_test, y_pred, average='macro', zero_division=0)
weighted_mean_precision = metrics.precision_score(y_test, y_pred, average='weighted', zero_division=0)
print(f"Precisión media: {mean_precision*100:.4f} %")
print(f"Precisión media pesada: {weighted_mean_precision*100:.4f} %")

Precisión media: 72.2774 %
Precisión media pesada: 79.1704 %


In [42]:
# Sensibilidad

mean_recall = metrics.recall_score(y_test, y_pred, average='macro', zero_division='warn')
weighted_mean_recall = metrics.recall_score(y_test, y_pred, average='weighted', zero_division='warn')
print(f"Sensibilidad media: {mean_recall*100:.4f} %")
print(f"Sensibilidad media pesada: {weighted_mean_recall*100:.4f} %")

Sensibilidad media: 67.1164 %
Sensibilidad media pesada: 80.8036 %


In [82]:
# Valor F1

mean_F1 = metrics.f1_score(y_test, y_pred, average='macro', zero_division='warn')
weighted_mean_F1 = metrics.f1_score(y_test, y_pred, average='weighted', zero_division='warn')
print(f"F1 media: {mean_F1*100:.4f} %")
print(f"F1 media pesada: {weighted_mean_F1*100:.4f} %")

F1 media: 67.9726 %
F1 media pesada: 78.9267 %


## Sistema entrenado en la base de datos aumentada

Con 1000 muestras por clase.

In [52]:
# path_aug = '/home/jovyan/work/models/05_CNN_aug/aug_1000/models_aug/best_model.h5'
model_augmented_data = keras.models.load_model('05_CNN_aug/aug_1000/models_aug/best_model.h5')

In [53]:
y_proba_aug = model_augmented_data.predict(X_test, verbose=1)
y_pred_aug = np.argmax(y_proba_aug, axis=-1)
y_pred_id_aug = np.array([index2symbol_id_test[element] for element in y_pred_aug])



In [60]:
accuracy_aug = metrics.accuracy_score(y_test, y_pred_aug)
print(f"TOP1: {accuracy_aug*100:.4f} %")
print(f"TOP3: {metrics.top_k_accuracy_score(y_test_aug, y_proba_aug, k=3)*100:.4f} %")
print(f"TOP5: {metrics.top_k_accuracy_score(y_test_aug, y_proba_aug, k=5)*100:.4f} %")
print(f"MER : {MER(y_test_id, y_pred_id)*100:.4f} %")

TOP1: 79.5078 %
TOP3: 94.8881 %
TOP5: 96.9745 %
MER : 89.5147 %


In [55]:
# Precisión

mean_precision_aug = metrics.precision_score(y_test, y_pred_aug, average='macro', zero_division=0)
weighted_mean_precision_aug = metrics.precision_score(y_test, y_pred_aug, average='weighted', zero_division=0)
print(f"Precisión media: {mean_precision_aug*100:.4f} %")
print(f"Precisión media pesada: {weighted_mean_precision_aug*100:.4f} %")

Precisión media: 68.6724 %
Precisión media pesada: 79.6440 %


In [56]:
# Sensibilidad

mean_recall_aug = metrics.recall_score(y_test, y_pred_aug, average='macro', zero_division='warn')
weighted_mean_recall_aug = metrics.recall_score(y_test, y_pred_aug, average='weighted', zero_division='warn')
print(f"Sensibilidad media: {mean_recall_aug*100:.4f} %")
print(f"Sensibilidad media pesada: {weighted_mean_recall_aug*100:.4f} %")

Sensibilidad media: 70.8287 %
Sensibilidad media pesada: 79.5078 %


In [83]:
# Valor F1

mean_F1_aug = metrics.f1_score(y_test, y_pred_aug, average='macro', zero_division='warn')
weighted_mean_F1_aug = metrics.f1_score(y_test, y_pred_aug, average='weighted', zero_division='warn')
print(f"F1 media: {mean_F1_aug*100:.4f} %")
print(f"F1 media pesada: {weighted_mean_F1_aug*100:.4f} %")

F1 media: 68.7730 %
F1 media pesada: 78.8242 %


## Resumen de las puntuaciones sin pesar

In [84]:
import pandas as pd

resumen_dict = dict()
resumen_dict['Modelo desequilibrado'] = {'Exactitud': round(accuracy*100,1), 'Precisión':round(mean_precision*100,1), 'Sensibilidad':round(mean_recall*100,1), 'Valor F1':round(mean_F1*100,1)}
resumen_dict['Modelo equilibrado'] = {'Exactitud':round(accuracy_aug*100,1), 'Precisión':round(mean_precision_aug*100,1), 'Sensibilidad':round(mean_recall_aug*100,1), 'Valor F1':round(mean_F1_aug*100,1)}

tabla_datos = pd.DataFrame(resumen_dict).transpose()
tabla_datos

Unnamed: 0,Exactitud,Precisión,Sensibilidad,Valor F1
Modelo desequilibrado,80.8,72.3,67.1,68.0
Modelo equilibrado,79.5,68.7,70.8,68.8


In [85]:
print(tabla_datos.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  Exactitud &  Precisión &  Sensibilidad &  Valor F1 \\
\midrule
Modelo desequilibrado &       80.8 &       72.3 &          67.1 &      68.0 \\
Modelo equilibrado    &       79.5 &       68.7 &          70.8 &      68.8 \\
\bottomrule
\end{tabular}



  print(tabla_datos.to_latex())
