<h1>Algoritmo Naïve Bayes</h1>

<h2>Breast Cancer</h2>

<h3>Hold-Out 70/30 estratificado</h3>

In [13]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix

data_cancer = load_breast_cancer() # dataset de 2 clases, es o no predispuesto a tener cancer de pecho
X_cancer, y_cancer = data_cancer.data, data_cancer.target # se separan datos de variable objetivo
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size=0.3, stratify=y_cancer, random_state=42)
# indica que se estratifican los datos

nb_model = GaussianNB() # inicializa naive bayes gausiano
nb_model.fit(X_cancer_train, y_cancer_train) # enrtena modelo
y_cancer_pred_holdout = nb_model.predict(X_cancer_test) # realiza predicciones
matriz_cancer_holdout = confusion_matrix(y_cancer_test, y_cancer_pred_holdout) # matriz de confusion
accuracy_cancer_holdout = accuracy_score(y_cancer_test, y_cancer_pred) # precision

print("Matriz de Confusión:\n", matriz_cancer_holdout)
print("Precisión:", accuracy_cancer_holdout)

Matriz de Confusión:
 [[ 57   7]
 [  2 105]]
Precisión: 0.9473684210526315


<h3>10-Fold Cross-Validation estratificado</h3>

In [18]:
from sklearn.model_selection import StratifiedKFold, cross_val_predict

cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) # 10 folds estratificados
y_cancer_pred_10fold = cross_val_predict(nb_model, X_cancer, y_cancer, cv=cv) # usa validación cruzada y devuelve predicciones de cada fold

matriz_cancer_10fold = confusion_matrix(y_cancer, y_cancer_pred_10fold)
accuracy_cancer_10fold = accuracy_score(y_cancer, y_cancer_pred_10fold)
print("Matriz de Confusión:\n", matriz_cancer_10fold)
print("Precisión:", accuracy_cancer_10fold)

Matriz de Confusión:
 [[188  24]
 [ 12 345]]
Precisión: 0.9367311072056239


<h3>Leave-One-Out</h3>

In [20]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()
y_cancer_pred_loo = cross_val_predict(nb_model, X_cancer, y_cancer, cv=loo)
# devuelve la prediccion de cada dato usando validacion cruzada de cada dato

matriz_cancer_loo = confusion_matrix(y_cancer, y_cancer_pred_loo)
accuracy_cancer_loo = accuracy_score(y_cancer, y_cancer_pred_loo)
print("Matriz de Confusión:\n", matriz_cancer_loo)
print("Precisión:", accuracy_cancer_loo)

Matriz de Confusión:
 [[189  23]
 [ 12 345]]
Precisión: 0.9384885764499121


<h2>Wine</h2>

<h3>Hold-Out 70/30 estratificado</h3>

In [28]:
from sklearn.datasets import load_wine

data_wine = load_wine()
X_wine, y_wine = data_wine.data, data_wine.target
X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(X_wine, y_wine, test_size=0.3, stratify=y_wine, random_state=42)

nb_model.fit(X_wine_train, y_wine_train)
y_wine_pred_holdout = nb_model.predict(X_wine_test)
matriz_wine_holdout = confusion_matrix(y_wine_test, y_wine_pred_holdout)
accuracy_wine_holdout = accuracy_score(y_wine_test, y_wine_pred_holdout)

print("Matriz de Confusión:\n", matriz_wine_holdout)
print("Precisión:", accuracy_wine_holdout)

Matriz de Confusión:
 [[18  0  0]
 [ 0 21  0]
 [ 0  0 15]]
Precisión: 1.0


<h3>10-Fold Cross-Validation estratificado</h3>

In [32]:
y_wine_pred_10fold = cross_val_predict(nb_model, X_wine, y_wine, cv=cv)

matriz_wine_10fold = confusion_matrix(y_wine, y_wine_pred_10fold)
accuracy_wine_10fold = accuracy_score(y_wine, y_wine_pred_10fold)
print("Matriz de Confusión:\n", matriz_wine_10fold)
print("Precisión:", accuracy_wine_10fold)

Matriz de Confusión:
 [[57  2  0]
 [ 0 69  2]
 [ 0  0 48]]
Precisión: 0.9775280898876404


<h3>Leave-One-Out</h3>

In [34]:
y_wine_pred_loo = cross_val_predict(nb_model, X_wine, y_wine, cv=loo)

matriz_wine_loo = confusion_matrix(y_wine, y_wine_pred_loo)
accuracy_wine_loo = accuracy_score(y_wine, y_wine_pred_loo)
print("Matriz de Confusión:\n", matriz_wine_loo)
print("Precisión:", accuracy_wine_loo)

Matriz de Confusión:
 [[57  2  0]
 [ 0 69  2]
 [ 0  0 48]]
Precisión: 0.9775280898876404


<h2>Iris</h2>

<h3>Hold-Out 70/30 estratificado</h3>

In [36]:
from sklearn.datasets import load_iris

data_iris = load_iris()
X_iris, y_iris = data_iris.data, data_iris.target
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.3, stratify=y_iris, random_state=42)

nb_model.fit(X_iris_train, y_iris_train)
y_iris_pred_holdout = nb_model.predict(X_iris_test)
matriz_iris_holdout = confusion_matrix(y_iris_test, y_iris_pred_holdout)
accuracy_iris_holdout = accuracy_score(y_iris_test, y_iris_pred_holdout)

print("Matriz de Confusión:\n", matriz_iris_holdout)
print("Precisión:", accuracy_iris_holdout)

Matriz de Confusión:
 [[15  0  0]
 [ 0 14  1]
 [ 0  3 12]]
Precisión: 0.9111111111111111


<h3>10-Fold Cross-Validation estratificado</h3>

In [38]:
y_iris_pred_10fold = cross_val_predict(nb_model, X_iris, y_iris, cv=cv)

matriz_iris_10fold = confusion_matrix(y_iris, y_iris_pred_10fold)
accuracy_iris_10fold = accuracy_score(y_iris, y_iris_pred_10fold)
print("Matriz de Confusión:\n", matriz_iris_10fold)
print("Precisión:", accuracy_iris_10fold)

Matriz de Confusión:
 [[50  0  0]
 [ 0 47  3]
 [ 0  4 46]]
Precisión: 0.9533333333333334


<h3>Leave-One-Out</h3>

In [40]:
y_iris_pred_loo = cross_val_predict(nb_model, X_iris, y_iris, cv=loo)

matriz_iris_loo = confusion_matrix(y_iris, y_iris_pred_loo)
accuracy_iris_loo = accuracy_score(y_iris, y_iris_pred_loo)
print("Matriz de Confusión:\n", matriz_iris_loo)
print("Precisión:", accuracy_iris_loo)

Matriz de Confusión:
 [[50  0  0]
 [ 0 47  3]
 [ 0  4 46]]
Precisión: 0.9533333333333334
