In [5]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score

# Load and preprocess data
breast_cancer = fetch_ucirepo(id=15)
X = breast_cancer.data.features.replace('?', np.nan).apply(pd.to_numeric).dropna()
y = breast_cancer.data.targets.loc[X.index].replace({2:0, 4:1})

# Split data (no scaling required for Na誰ve Bayes, but included for consistency)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# Train Gaussian Na誰ve Bayes
nb = GaussianNB(var_smoothing=1e-9)
nb.fit(X_train, y_train.values.ravel())

# Get probability estimates
probabilities = nb.predict_proba(X_test)[:, 1]

# Predictions with 0.5 threshold
y_pred = nb.predict(X_test)

# Performance evaluation
conf_matrix = confusion_matrix(y_test, y_pred)
metrics = {
    'Accuracy': accuracy_score(y_test, y_pred),
    'Precision': precision_score(y_test, y_pred),
    'Recall': recall_score(y_test, y_pred),
    'F1-Score': f1_score(y_test, y_pred),
    'AUC-ROC': roc_auc_score(y_test, probabilities)
}

# Display results with Bayesian probabilities
print("Gaussian Na誰ve Bayes Performance:")
print(f"- Accuracy: {metrics['Accuracy']:.4f}")
print(f"- Precision: {metrics['Precision']:.3f} | Recall: {metrics['Recall']:.3f}")
print(f"- F1-Score: {metrics['F1-Score']:.3f} | AUC-ROC: {metrics['AUC-ROC']:.3f}")

print("\nClass Conditional Means (Malignant vs Benign):")
pd.DataFrame({
    'Feature': X.columns,
    'Mean (Benign)': nb.theta_[0],
    'Mean (Malignant)': nb.theta_[1]
})

pd.DataFrame(conf_matrix,
             index=['Actual Benign (TN/FP)', 'Actual Malignant (FN/TP)'],
             columns=['Predicted Benign', 'Predicted Malignant'])

Gaussian Na誰ve Bayes Performance:
- Accuracy: 0.9591
- Precision: 0.921 | Recall: 0.967
- F1-Score: 0.943 | AUC-ROC: 0.980

Class Conditional Means (Malignant vs Benign):


Unnamed: 0,Predicted Benign,Predicted Malignant
Actual Benign (TN/FP),106,5
Actual Malignant (FN/TP),2,58
