In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Loading the data
df = pd.read_csv("/Users/gyan/Documents/Knowledge Discovery and Data Mining/CSV Files/breast-cancer-wisconsin.csv")

# Replacing ? with NaN and dropping rows with missing values
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)

#First column is Sample and last column is Class
feature_columns = df.columns[1:-1]
for col in feature_columns:
    df[col] = pd.to_numeric(df[col])

#Defining features and targets
X = df.drop(["Sample", "Class"], axis=1)
y = df["Class"]

#Splitting 70% train and 30% test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

#Building Naives Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

#Predictions and evaluating model
y_prediction = model.predict(X_test)
accuracy = accuracy_score(y_test, y_prediction)
c_matrix = confusion_matrix(y_test, y_prediction)
c_report = classification_report(y_test, y_prediction)

print("Naive Bayes Classification Results")
print("----------------------------------")
print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(c_matrix)
print("\nClassification Report:")
print(c_report)

Naive Bayes Classification Results
----------------------------------
Accuracy: 0.9659
Confusion Matrix:
[[123   4]
 [  3  75]]

Classification Report:
              precision    recall  f1-score   support

           2       0.98      0.97      0.97       127
           4       0.95      0.96      0.96        78

    accuracy                           0.97       205
   macro avg       0.96      0.97      0.96       205
weighted avg       0.97      0.97      0.97       205

