In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
#Lectura de archivo gracias a la librería pandas
archivo = pd.read_csv("alzheimers_disease_data.csv")
archivo = archivo.drop("DoctorInCharge",axis=1)

### Modelo de Naive bayes con Gauss.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
# Separar variable objetivo de los demás atributos
X = archivo.drop('Diagnosis', axis=1)
y = archivo['Diagnosis']


#División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


#Balancear el conjunto de entrenamiento
ros = RandomOverSampler(random_state=0)
X_train_balanced, y_train_balanced = ros.fit_resample(X_train, y_train)

#Escalar datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced) 
X_test_scaled = scaler.transform(X_test)

#Entrenamiento del modelo
nb_model = GaussianNB()
nb_model.fit(X_train_scaled, y_train_balanced) 

y_pred = nb_model.predict(X_test_scaled)

accuracy_NB = accuracy_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred,labels=[1,0] )

#Reporte
class_report = classification_report(y_test, y_pred)
print("Reporte de clasificación:")
print(class_report)

#Matriz de confusión
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Oranges',
           xticklabels=['Positivo', 'Negativo'],
           yticklabels=['Positivo', 'Negativo'])
plt.title('Matriz de Confusión - GaussianNB')
plt.ylabel('Valor Real')
plt.xlabel('Valor Predicho')
plt.show()

precision_NB = precision_score(y_test, y_pred)
recall_NB = recall_score(y_test, y_pred)
f1_nb = f1_score(y_test, y_pred)
tn_nb, fp_nb, fn_nb, tp_nb = conf_matrix.ravel()


### Modelo de Naive Bayes con Bernoulli.

In [None]:
from sklearn.naive_bayes import BernoulliNB

# Separar variable objetivo de los demás atributos
X = archivo.drop('Diagnosis', axis=1)
y = archivo['Diagnosis']


#División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


#Balancear el conjunto de entrenamiento
ros = RandomOverSampler(random_state=0)
X_train_balanced, y_train_balanced = ros.fit_resample(X_train, y_train)

#Escalar datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced) 
X_test_scaled = scaler.transform(X_test)

#Entrenar modelo.
Bernoulli_model = BernoulliNB()

Bernoulli_model.fit(X_train_scaled, y_train_balanced) 

y_pred = Bernoulli_model.predict(X_test_scaled)

accuracy_BNB = accuracy_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred, labels=[1, 0])

#Reporte de clasificación
class_report = classification_report(y_test, y_pred)
print("Reporte de clasificación:")
print(class_report)

#Matriz de confusión con orden modificado
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Oranges',
            xticklabels=['Positivo', 'Negativo'],  
            yticklabels=['Positivo', 'Negativo'])  
plt.title('Matriz de Confusión - Bernoulli')
plt.ylabel('Valor Real')
plt.xlabel('Valor Predicho')
plt.show()

precision_BNB = precision_score(y_test, y_pred)
recall_BNB = recall_score(y_test, y_pred)
f1_bnb = f1_score(y_test, y_pred)
tn_bnb, fp_bnb, fn_bnb, tp_bnb = conf_matrix.ravel()

### Modelo de KNN.

In [None]:
#Importar librerías
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

#Separar variable objetivo de los demás atributos
X = archivo.drop('Diagnosis', axis=1)
y = archivo['Diagnosis']

#Separar datos de entrenamiento y de testeo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Balancear conjunto de entrenamiento
ros = RandomOverSampler(random_state=0)
X_train_balanced, y_train_balanced = ros.fit_resample(X_train, y_train)

#Estandarizar data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced)
X_test_scaled = scaler.transform(X_test)

k_range = range(1, 51)
scores = []

#Buscar mejor "n" y guardar sus valores para posteriormente graficarlos
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train_balanced)
    y_pred_k = knn.predict(X_test_scaled)
    scores.append(accuracy_score(y_test, y_pred_k))

#Muestra en un gráfico el desempeño de KNN (MEJORADO)
plt.figure(figsize=(20, 8))  # Tamaño más manejable
plt.plot(k_range, scores, marker='o', linewidth=2, markersize=4)
plt.title('Exactitud del modelo para diferentes valores de k', fontsize=14)
plt.xlabel('Valor de k', fontsize=12)
plt.ylabel('Exactitud', fontsize=12)
plt.grid(True, alpha=0.3)
# Mostrar solo cada 5 valores para mejor legibilidad
plt.xticks(k_range)
plt.tight_layout()
plt.show()

best_score = max(scores)
best_k = k_range[scores.index(best_score)]

#Mostrar información del mejor k
print(f"Mejor k encontrado: {best_k}")
print(f"Exactitud correspondiente: {best_score:.4f}")

#Verificar si hay múltiples k con la misma exactitud máxima
best_k_candidates = [k for k, score in zip(k_range, scores) if score == best_score]
if len(best_k_candidates) > 1:
    print(f"Otros k con la misma exactitud: {best_k_candidates}")
    print(f"Se eligió k={best_k} (el más pequeño para mayor simplicidad)")

#Entrena al modelo con el mejor K posible
final_knn_model = KNeighborsClassifier(n_neighbors=best_k)
final_knn_model.fit(X_train_scaled, y_train_balanced)
final_y_pred = final_knn_model.predict(X_test_scaled)

#Métricas del modelo final
final_accuracy_KNN = accuracy_score(y_test, final_y_pred)
print(f"\nExactitud del modelo final: {final_accuracy_KNN:.4f}")

#Matriz de confusión para el modelo final.
final_conf_matrix = confusion_matrix(y_test, final_y_pred, labels=[1, 0])

#Visualizar la matriz de confusión final
plt.figure(figsize=(8, 6))
sns.heatmap(final_conf_matrix, annot=True, fmt='d', cmap='Oranges',
            xticklabels=['Positivo', 'Negativo'],
            yticklabels=['Positivo', 'Negativo'])
plt.title(f'Matriz de Confusión - KNN (k={best_k})')
plt.ylabel('Valor Real')
plt.xlabel('Valor Predicho')
plt.show()

# Reporte de clasificación final
print("\nReporte de clasificación final:")
print(classification_report(y_test, final_y_pred))

precision_KNN = precision_score(y_test, y_pred)
recall_KNN = recall_score(y_test, y_pred)
f1_knn = f1_score(y_test, y_pred)
tn_knn, fp_knn, fn_knn, tp_knn = final_conf_matrix.ravel()

### Modelo de árboles de decisión.

In [None]:
from sklearn.tree import DecisionTreeClassifier,plot_tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

#Separar variable objetivo de los demás atributos
X = archivo.drop('Diagnosis', axis=1)
y = archivo['Diagnosis']

#Separar datos de entrenamiento y de testeo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Balancear conjunto de entrenamiento
ros = RandomOverSampler(random_state=0)
X_train_balanced, y_train_balanced = ros.fit_resample(X_train, y_train)

#Estandarizar data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced)
X_test_scaled = scaler.transform(X_test)

#Optimizar max_depth (profundidad del árbol)
depth_range = range(1, 21)
depth_scores = []

for depth in depth_range:
    tree = DecisionTreeClassifier(max_depth=depth, random_state=42)
    tree.fit(X_train_scaled, y_train_balanced)
    y_pred = tree.predict(X_test_scaled)
    depth_scores.append(accuracy_score(y_test, y_pred))

#Gráfico para max_depth
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(depth_range, depth_scores, marker='o', linewidth=2, markersize=4)
plt.title('Exactitud vs Max Depth', fontsize=12)
plt.xlabel('Max Depth')
plt.ylabel('Exactitud')
plt.grid(True, alpha=0.3)
plt.xticks(depth_range)

#Optimizar min_samples_split (Valor mínimo para separar un nodo interno)
split_range = range(2, 21)
split_scores = []

for split in split_range:
    tree = DecisionTreeClassifier(min_samples_split=split, random_state=42)
    tree.fit(X_train_scaled, y_train_balanced)
    y_pred = tree.predict(X_test_scaled)
    split_scores.append(accuracy_score(y_test, y_pred))

#Gráfico para min_samples_split
plt.subplot(1, 2, 2)
plt.plot(split_range, split_scores, marker='o', linewidth=2, markersize=4, color='orange')
plt.title('Exactitud vs Min Samples Split', fontsize=12)
plt.xlabel('Min Samples Split')
plt.ylabel('Exactitud')
plt.grid(True, alpha=0.3)
plt.xticks(split_range)
plt.tight_layout()
plt.show()

#Optimizar min_samples_leaf (Valor mínimo para ser un nodo hoja)
leaf_range = range(1, 11)
leaf_scores = []

for leaf in leaf_range:
    tree = DecisionTreeClassifier(min_samples_leaf=leaf, random_state=42)
    tree.fit(X_train_scaled, y_train_balanced)
    y_pred = tree.predict(X_test_scaled)
    leaf_scores.append(accuracy_score(y_test, y_pred))

#Gráfico para min_samples_leaf
plt.figure(figsize=(10, 5))
plt.plot(leaf_range, leaf_scores, marker='o', linewidth=2, markersize=4, color='green')
plt.title('Exactitud vs Min Samples Leaf', fontsize=12)
plt.xlabel('Min Samples Leaf')
plt.ylabel('Exactitud')
plt.grid(True, alpha=0.3)
plt.xticks(leaf_range)
plt.tight_layout()
plt.show()

#Mejor max_depth
best_depth_score = max(depth_scores)
best_depth = depth_range[depth_scores.index(best_depth_score)]
#Mejor min_samples_split
best_split_score = max(split_scores)
best_split = split_range[split_scores.index(best_split_score)]
#Mejor min_samples_leaf
best_leaf_score = max(leaf_scores)
best_leaf = leaf_range[leaf_scores.index(best_leaf_score)]

#Modelo a entrenar
final_tree = DecisionTreeClassifier(
    max_depth=best_depth,
    min_samples_split=best_split,
    min_samples_leaf=best_leaf,
    random_state=42
)
final_tree.fit(X_train_scaled, y_train_balanced)
final_y_pred = final_tree.predict(X_test_scaled)

#Métricas del modelo
final_accuracy_TREE = accuracy_score(y_test, final_y_pred)
print(f"\nExactitud del modelo final: {final_accuracy_TREE:.4f}")

final_conf_matrix = confusion_matrix(y_test, final_y_pred, labels=[1, 0])
plt.figure(figsize=(8, 6))
sns.heatmap(final_conf_matrix, annot=True, fmt='d', cmap='Oranges',
            xticklabels=['Positivo', 'Negativo'],
            yticklabels=['Positivo', 'Negativo'])
plt.title(f'Matriz de Confusión - Árbol de Decisión\n(depth={best_depth}, split={best_split}, leaf={best_leaf})')
plt.ylabel('Valor Real')
plt.xlabel('Valor Predicho')
plt.show()

# Reporte de clasificación final
print("\nReporte de clasificación final:")
print(classification_report(y_test, final_y_pred, zero_division=0))

plot_tree(final_tree,filled=True)
plt.show()

precision_TREE = precision_score(y_test, y_pred)
recall_TREE = recall_score(y_test, y_pred)
f1_tree = f1_score(y_test, y_pred)
tn_tree, fp_tree, fn_tree, tp_tree = final_conf_matrix.ravel()

### Modelo de Random Forest.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

#Separar variable objetivo de los demás atributos
X = archivo.drop('Diagnosis', axis=1)
y = archivo['Diagnosis']

#Separar datos de entrenamiento y de testeo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Balancear conjunto de entrenamiento
ros = RandomOverSampler(random_state=0)
X_train_balanced, y_train_balanced = ros.fit_resample(X_train, y_train)

#Estandarizar data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced)
X_test_scaled = scaler.transform(X_test)

#Optimizar n_estimators
range_estimators = range(10, 201, 20)
score_estimators = []

for estimator in range_estimators:
    forest = RandomForestClassifier(n_estimators=estimator, random_state=42)
    forest.fit(X_train_scaled, y_train_balanced) 
    y_pred = forest.predict(X_test_scaled)        
    score_estimators.append(accuracy_score(y_test, y_pred))

#Optimizar max_depth
depth_range = [None] + list(range(5, 21, 2))
depth_scores = []

for depth in depth_range:
    forest = RandomForestClassifier(max_depth=depth, random_state=42)
    forest.fit(X_train_scaled, y_train_balanced)
    y_pred = forest.predict(X_test_scaled)
    depth_scores.append(accuracy_score(y_test, y_pred))

#Optimizar min_samples_split
split_range = range(2, 11)
split_scores = []

for split in split_range:
    forest = RandomForestClassifier(min_samples_split=split, random_state=42)
    forest.fit(X_train_scaled, y_train_balanced)
    y_pred = forest.predict(X_test_scaled)
    split_scores.append(accuracy_score(y_test, y_pred))


plt.figure(figsize=(15, 5))
# Gráfico n_estimators
plt.subplot(1, 3, 1)
plt.plot(range_estimators, score_estimators, marker='o', linewidth=2, markersize=4)
plt.title('Exactitud vs N_Estimators', fontsize=12)
plt.xlabel('N_Estimators')
plt.ylabel('Exactitud')
plt.grid(True, alpha=0.3)
plt.xticks(range(10, 201, 40))  # Mostrar cada 40
#Gráfico max_depth
plt.subplot(1, 3, 2)
depth_labels = ['None'] + [str(d) for d in depth_range[1:]]
plt.plot(range(len(depth_range)), depth_scores, marker='o', linewidth=2, markersize=4, color='orange')
plt.title('Exactitud vs Max_Depth', fontsize=12)
plt.xlabel('Max_Depth')
plt.ylabel('Exactitud')
plt.grid(True, alpha=0.3)
plt.xticks(range(len(depth_range)), depth_labels, rotation=45)
#Gráfico min_samples_split
plt.subplot(1, 3, 3)
plt.plot(split_range, split_scores, marker='o', linewidth=2, markersize=4, color='green')
plt.title('Exactitud vs Min_Samples_Split', fontsize=12)
plt.xlabel('Min_Samples_Split')
plt.ylabel('Exactitud')
plt.grid(True, alpha=0.3)
plt.xticks(split_range)
plt.tight_layout()
plt.show()


#Mejor n_estimators
best_estimator_score = max(score_estimators)
best_estimator = list(range_estimators)[score_estimators.index(best_estimator_score)]
#Mejor max_depth
best_depth_score = max(depth_scores)
best_depth = depth_range[depth_scores.index(best_depth_score)]
#Mejor min_samples_split
best_split_score = max(split_scores)
best_split = split_range[split_scores.index(best_split_score)]

final_forest = RandomForestClassifier(
    n_estimators=best_estimator,
    max_depth=best_depth,
    min_samples_split=best_split,
    random_state=42
)

final_forest.fit(X_train_scaled, y_train_balanced)
final_y_pred = final_forest.predict(X_test_scaled)

#Métricas del modelo final
final_accuracy_RF = accuracy_score(y_test, final_y_pred)
print(f"\nExactitud del modelo final: {final_accuracy_RF:.4f}")

#Matriz de confusión
final_conf_matrix = confusion_matrix(y_test, final_y_pred, labels=[1, 0])

plt.figure(figsize=(8, 6))
sns.heatmap(final_conf_matrix, annot=True, fmt='d', cmap='Oranges',
            xticklabels=['Positivo', 'Negativo'],
            yticklabels=['Positivo', 'Negativo'])
plt.title(f'Matriz de Confusión - Random Forest\n(estimators={best_estimator}, depth={best_depth}, split={best_split})')
plt.ylabel('Valor Real')
plt.xlabel('Valor Predicho')
plt.show()

# Reporte de clasificación final
print("\nReporte de clasificación final:")
print(classification_report(y_test, final_y_pred, zero_division=0))

precision_RF = precision_score(y_test, y_pred)
recall_RF = recall_score(y_test, y_pred)
f1_rf = f1_score(y_test, y_pred)
tn_rf, fp_rf, fn_rf, tp_rf = final_conf_matrix.ravel()

### Curvas ROC de los modelos.

In [None]:
from sklearn.metrics import roc_curve,roc_auc_score
import matplotlib.pyplot as plt
#Crear la gráfica de la curva ROC de Gaussian Naive Bayes 
y_pred_proba = nb_model.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
auc_score_NB = roc_auc_score(y_test, y_pred_proba)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'Curva ROC (AUC = {auc_score_NB:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Línea de referencia (AUC = 0.50)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC - Gaussian Naive Bayes')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)
plt.show()

#Crear la gráfica de la curva ROC de Bernoulli Naive Bayes
y_pred_proba = Bernoulli_model.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
auc_score_BBN = roc_auc_score(y_test, y_pred_proba)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'Curva ROC (AUC = {auc_score_BBN:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Línea de referencia (AUC = 0.50)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC - Bernoulli Naive Bayes')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)
plt.show()

#Crear la gráfica de la curva ROC de KNN
y_pred_proba = final_knn_model.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
auc_score_KNN = roc_auc_score(y_test, y_pred_proba)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'Curva ROC (AUC = {auc_score_KNN:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Línea de referencia (AUC = 0.50)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC - KNN')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)
plt.show()

#Crear la gráfica de la curva ROC de Árbol de decisión
y_pred_proba = final_tree.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
auc_score_TREE= roc_auc_score(y_test, y_pred_proba)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'Curva ROC (AUC = {auc_score_TREE:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Línea de referencia (AUC = 0.50)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC - Árbol de decisión')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)
plt.show()

#Crear la gráfica de la curva ROC de Random Forest
y_pred_proba = final_forest.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
auc_score_RF = roc_auc_score(y_test, y_pred_proba)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'Curva ROC (AUC = {auc_score_RF:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Línea de referencia (AUC = 0.50)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC - Random Forest')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)
plt.show()


### Resumen de los valores importantes de los modelos.

In [137]:
Naive_Bayes_dict = {
    'Accuracy': accuracy_NB,
    'Precision': precision_NB,
    'Recall': recall_NB,
    'F1-Score': f1_nb,
    'True Positives': tp_nb,
    'True Negatives': tn_nb,
    'False Positives': fp_nb,
    'False Negatives': fn_nb,
    'AUC-score': auc_score_NB
}
Naive_Bayes_dict2 = {
    'Accuracy': accuracy_BNB,
    'Precision': precision_BNB,
    'Recall': recall_BNB,
    'F1-Score': f1_bnb,
    'True Positives': tp_bnb,
    'True Negatives': tn_bnb,
    'False Positives': fp_bnb,
    'False Negatives': fn_bnb,
    'AUC-score': auc_score_BBN
}

KNN = {
    'Accuracy': final_accuracy_KNN,
    'Precision': precision_KNN,
    'Recall': recall_KNN,
    'F1-Score': f1_knn,
    'True Positives': tp_knn,
    'True Negatives': tn_knn,
    'False Positives': fp_knn,
    'False Negatives': fn_knn,
    'AUC-score': auc_score_KNN
}
TREE = {
    'Accuracy': final_accuracy_TREE,
    'Precision': precision_TREE,
    'Recall': recall_TREE,
    'F1-Score': f1_tree,
    'True Positives': tp_tree,
    'True Negatives': tn_tree,
    'False Positives': fp_tree,
    'False Negatives': fn_tree,
    'AUC-score': auc_score_TREE,
}
RF = {
    'Accuracy': final_accuracy_RF,
    'Precision': precision_RF,
    'Recall': recall_RF,
    'F1-Score': f1_rf,
    'True Positives': tp_rf,
    'True Negatives': tn_rf,
    'False Positives': fp_rf,
    'False Negatives': fn_rf,
    'AUC-score': auc_score_RF
}

resume = pd.DataFrame({
    'Gaussian Naive Bayes': pd.Series(Naive_Bayes_dict),
    'Bernoulli Naive Bayes': pd.Series(Naive_Bayes_dict2),
    'KNN': pd.Series(KNN),
    'Decision Tree': pd.Series(TREE),
    'Random Forest': pd.Series(RF)
})

resume.T

Unnamed: 0,Accuracy,Precision,Recall,F1-Score,True Positives,True Negatives,False Positives,False Negatives,AUC-score
Gaussian Naive Bayes,0.839535,0.75,0.823529,0.785047,235.0,126.0,27.0,42.0,0.895
Bernoulli Naive Bayes,0.853488,0.764706,0.849673,0.804954,237.0,130.0,23.0,40.0,0.895189
KNN,0.790698,0.951389,0.895425,0.922559,237.0,103.0,50.0,40.0,0.859064
Decision Tree,0.972093,0.923567,0.947712,0.935484,270.0,148.0,5.0,7.0,0.976865
Random Forest,0.953488,0.951389,0.895425,0.922559,271.0,139.0,14.0,6.0,0.981029


### Gráficos por métrica.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

def plot_grouped_metrics(df):
    """Crear un gráfico de barras separado para cada métrica"""
    metrics = df.index.tolist()
    
    for i, metric in enumerate(metrics):
        #Crear una nueva figura para cada métrica
        plt.figure(figsize=(10, 6))
        
        #Datos para la métrica actual
        values = df.loc[metric].values
        models = df.columns.tolist()
        
        bars = plt.bar(models, values, alpha=0.8)
        
        colors = plt.cm.Set3(np.linspace(0, 1, len(models)))
        for bar, color in zip(bars, colors):
            bar.set_color(color)

        for j, (model, value) in enumerate(zip(models, values)):
            if pd.notna(value):  # Solo si el valor no es NaN
                plt.text(j, value + max(values)*0.01, f'{value:.3f}',
                        ha='center', va='bottom', fontweight='bold')
        
        plt.title(f'{metric}', fontweight='bold', fontsize=16)
        plt.ylabel('Valor', fontsize=12)
        plt.xlabel('Modelos', fontsize=12)
        plt.ylim(0, max(values) * 1.1 if max(values) > 0 else 1)
        
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', alpha=0.3)
        
        plt.tight_layout()
        plt.show()

plot_grouped_metrics(resume)