# Ejercicio de Lab 5. Medidas de desempeño en ML

In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [62]:
heart = pd.read_csv('HeartDisease_Dataset/dataset_heartClean.csv')
heart.head()

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
0,40,1,2,140,289,0,0,172,0,0.0,1,0
1,49,0,3,160,180,0,0,156,0,1.0,2,1
2,37,1,2,130,283,0,1,98,0,0.0,1,0
3,48,0,4,138,214,0,0,108,1,1.5,2,1
4,54,1,3,150,195,0,0,122,0,0.0,1,0


In [63]:
X = heart[['chest pain type','cholesterol','max heart rate','exercise angina','oldpeak','ST slope']]
y = heart['target']
# Dividir los datos en entrenamiento (70%) y prueba (30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Crear el modelo de regresión logística
log_reg = LogisticRegression()

# Entrenar el modelo usando los datos de entrenamiento
log_reg.fit(X_train, y_train)

# Hacer predicciones en los datos de prueba
y_pred = log_reg.predict(X_test)

print(y_test)
print(y_pred)

668    0
30     1
377    1
535    1
807    0
      ..
133    1
813    0
734    1
360    1
875    0
Name: target, Length: 276, dtype: int64
[0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 1 0 1 1 0 0 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 1
 0 1 1 1 1 1 0 0 1 0 1 1 0 0 0 1 1 0 0 0 1 1 1 0 0 0 1 0 1 1 1 1 0 0 0 1 1
 0 1 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 0 1 0 1 1 0 1 0 0 1 1 0 0 1 0 1
 1 0 0 1 0 1 0 1 0 1 1 0 1 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 1 0 0
 0 0 0 1 1 0 1 0 0 0 0 1 0 0 1 0 1 1 0 1 0 0 1 1 0 1 0 1 0 1 0 1 1 1 0 1 1
 0 1 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 1 1 0 1 0 1 1 0 0 0 0 1 1 1 1 1 0 0 0 1
 1 0 1 0 1 1 0 0 1 1 0 0 0 1 1 1 1 1 0 0 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1
 1 0 1 0 0 0 0 1 1 1 1 1 1 0 1 1 0]


## Matriz de confusión

In [64]:
def matrizConfusion(y_test,y_pred):
    TP = 0  # True Positive
    TN = 0  # True Negative
    FP = 0  # False Positive
    FN = 0  # False Negative

    for trueValue, predictedValue in zip(y_test,y_pred):
        if(trueValue == predictedValue and trueValue == 1):
            TP+=1
        elif(trueValue == predictedValue and trueValue == 0):
            TN+=1
        elif(trueValue != predictedValue and predictedValue == 1):
            FP+=1
        else:
            FN+=1
            
    print(f'Matriz de Confusión:')
    print(f'               Predicción 0    Predicción 1')
    print(f'Valor Real 0    {TN}             {FP}')
    print(f'Valor Real 1    {FN}             {TP}')
    
    return [TP,TN,FP,FN]

## Medidas de desempeño

In [65]:
def accuracy(matriz):
    TP, TN, FP, FN = matriz

    return (TP + TN) / (TP + TN + FP + FN)

def error(matriz):
    TP,TN,FP,FN = matriz
    
    return (FP+FN)/(TP+TN+FP+FN)

# Precision
def precision(matriz):
    TP,TN,FP,FN = matriz
    
    return TP / (TP + FP)

# Recal
def recall(matriz):
    TP,TN,FP,FN = matriz

    return TP / (TP + FN)

# Positive Predictive Value
def PPV(matriz):
    TP,TN,FP,FN = matriz

    return precision(matriz)

# True Positive Rate
def TPR(matriz):
    TP,TN,FP,FN = matriz

    return recall(matriz)

# True Negative Rate (Specificity)
def TNR(matriz):
    TP,TN,FP,FN = matriz

    return TN/(TN+FP)

# False Positive Rate
def FPR(matriz):
    TP,TN,FP,FN = matriz

    return FP/(FP+TN)

# False Negative Rate
def FNR(matriz):
    TP,TN,FP,FN = matriz
    return FN/(FN+TP)
    
# F1-Score
def F1score(precision,recall):
    numerator = precision*recall
    denominator = precision+recall

    return 2*(numerator/denominator)
    

In [66]:
# Matriz de confusion

matriz = matrizConfusion(y_test,y_pred)
print("\n")
print(f"Accuracy: {accuracy(matriz)}")
print(f"Error: {error(matriz)}")
print(f"Precision: {precision(matriz)}")
print(f"Recall: {recall(matriz)}")
print(f"Positive Predictive Value: {PPV(matriz)}")
print(f"True Positive Rate: {TPR(matriz)}")
print(f"True Negative Rate: {TNR(matriz)}")
print(f"False Positive Rate: {FPR(matriz)}")
print(f"False Negative Rate: {FNR(matriz)}")
precision = precision(matriz)
recall = recall(matriz)
print(f"F1-Score: {F1score(precision,recall)}")


Matriz de Confusión:
               Predicción 0    Predicción 1
Valor Real 0    99             13
Valor Real 1    30             134


Accuracy: 0.8442028985507246
Error: 0.15579710144927536
Precision: 0.9115646258503401
Recall: 0.8170731707317073
Positive Predictive Value: 0.9115646258503401
True Positive Rate: 0.8170731707317073
True Negative Rate: 0.8839285714285714
False Positive Rate: 0.11607142857142858
False Negative Rate: 0.18292682926829268
F1-Score: 0.8617363344051446
