In [1]:
# importar dataset
from sklearn.datasets import load_breast_cancer

X,y = load_breast_cancer(return_X_y=True)

In [2]:
# hold out para dividir dataset, 70% para entrenamiento y 30% para pruebas
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)
# se imprimen etiquetas de conjunto de pruebas
print(y_test)

[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1]


In [5]:
# entrenar algoritmo K-NN 
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
# se imprimen etiquetas predichas por el algoritmo
print(y_pred)

[1 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 1 0 1 1
 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1]


In [6]:
# obtener medidas manualmente
TP = TN = FP = FN = 0
for actual, predicted in zip(y_test, y_pred):
    if actual == 1 and predicted == 1:
        TP += 1  # True Positive
    elif actual == 0 and predicted == 0:
        TN += 1  # True Negative
    elif actual == 0 and predicted == 1:
        FP += 1  # False Positive
    elif actual == 1 and predicted == 0:
        FN += 1  # False Negative

matriz = [["TP " + str(TP), "FN " + str(FN)], ["FP " + str(FP), "TN " + str(TN)]]
precision = TP / (TP + FP)
npv = TN / (TN + FN)
recall = TP / (TP + FN)
specificity = TN / (TN + FN)
accuracy = (TP + TN) / (TP + TN + FP + FN)
error = 1 - accuracy
F1s = 2 * ( (precision * recall) / ( precision + recall))
fpr = FP / (FP + TN)
fnr = FN / (FN + TP)

print("Matriz de confusión:")
for row in matriz:
    print(row)
print("Accuracy: " + str(accuracy))
print("Error: " + str(error))
print("Precision / Positive Predictive Value: " + str(precision))
print("Negative Predictive Value: " + str(npv))
print("Sensitivity / Recall / True Positive Rate : " + str(recall))
print("Specificity / True Negative Rate : " + str(specificity))
print("False Positive Rate: " + str(fpr))
print("False Negative Rate: " + str(fnr))
print("F1-Score: " + str(F1s))


Matriz de confusión:
['TP 107', 'FN 1']
['FP 6', 'TN 57']
Accuracy: 0.9590643274853801
Error: 0.040935672514619936
Precision / Positive Predictive Value: 0.9469026548672567
Negative Predictive Value: 0.9827586206896551
Sensitivity / Recall / True Positive Rate : 0.9907407407407407
Specificity / True Negative Rate : 0.9827586206896551
False Positive Rate: 0.09523809523809523
False Negative Rate: 0.009259259259259259
F1-Score: 0.9683257918552035


In [17]:
# obtener medidas con librerías
from sklearn.metrics import confusion_matrix, classification_report

conf_matrix = confusion_matrix(y_test, y_pred)

print("Matriz de confusión:")
print(conf_matrix)

print("\nInforme de clasificación:")
print(classification_report(y_test, y_pred))

Matriz de confusión:
[[ 57   6]
 [  1 107]]

Informe de clasificación:
              precision    recall  f1-score   support

           0       0.98      0.90      0.94        63
           1       0.95      0.99      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.95      0.96       171
weighted avg       0.96      0.96      0.96       171

