In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
pip install ucimlrepo

In [None]:
from ucimlrepo import fetch_ucirepo 
  

breast_cancer_wisconsin_original = fetch_ucirepo(id=15) 
  
X = breast_cancer_wisconsin_original.data.features 
y = breast_cancer_wisconsin_original.data.targets 
  

print(breast_cancer_wisconsin_original.metadata) 
  
print(breast_cancer_wisconsin_original.variables) 


In [None]:
print(X.shape)
print(y.shape)



In [None]:
X.info()

In [None]:
X.head(5)

In [None]:
X.columns

In [None]:
X.select_dtypes(exclude=['object']).describe().round(decimals=2).transpose()

In [None]:
X.describe()

In [None]:
X.hist(bins=20, figsize=(15, 15))

In [None]:
#replacing y with 0 and 1

new_y =  y.replace({2: 0, 4: 1})

print(new_y.head(20))

print('Unique Values in y:', np.unique(new_y))

In [None]:
new_y.hist(bins=5, figsize=(5, 5))

In [None]:
#Missing Values

print(X.isnull().sum().sort_values(ascending=False).to_string())

In [None]:
#Removing Missing Values

X_clean = X.dropna() 
y_clean = y.loc[X_clean.index]

print(X_clean.isnull().sum())

In [None]:
corr=X.corr()

plt.figure(figsize=(10, 6))
heatmap = sns.heatmap(corr, vmin=-1, vmax=1, cmap='BrBG')

In [None]:
data_clean = X_clean.copy()
data_clean["y"] = y_clean 
corr_matrix = data_clean.corr()

plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Feature Correlation Heatmap (All Features)")
plt.show()

In [None]:
#Splitting Data

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.25, random_state=7)

print('Shape of training data :',X_train.shape)
print('Shape of testing data :',y_test.shape)

In [None]:
#Naive Bayes

from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score


y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

model = GaussianNB()
model.fit(X_train,y_train)

predict_train = model.predict(X_train)
print('\nTarget on train data:\n',predict_train) 

accuracy_train = accuracy_score(y_train,predict_train)
print('\nAccuracy score on train dataset : ', accuracy_train)

predict_test = model.predict(X_test)
print('\nTarget on test data:\n',predict_test) 

accuracy_test = accuracy_score(y_test,predict_test)
print('\nAccuracy Score on test dataset : ', accuracy_test)

In [None]:
#Confusion Matrix
from sklearn.metrics import confusion_matrix

y_pred = model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

print(cm)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
import pickle

# Save the trained model as a pickle file
with open("naive_bayes_model.pkl", "wb") as file:
    pickle.dump(model, file)

print("\nModel saved as naive_bayes_model.pkl!")