In [None]:
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [None]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score, classification_report

In [None]:
# fetch dataset
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)

# data (as pandas dataframes)
X = breast_cancer_wisconsin_diagnostic.data.features
y = breast_cancer_wisconsin_diagnostic.data.targets

In [None]:
def print_report(y_true, ypred, model_name):
  print('-'*30)
  print(model_name)
  print('-'*30)
  print("Confusion Matrix: ")
  print(confusion_matrix(y_true, ypred))

  print('-'*30)
  print(f"Accuracy_score: {accuracy_score(y_true, ypred)}")
  print("-"*30)
  print(f"Recall_score: {recall_score(y_true, ypred, average='macro')}")
  print('-'* 30)
  print(f"Precision_score: {precision_score(y_true, ypred, average='macro')}")
  print('-'* 30)
  print(f"F1_score: {f1_score(y_true, ypred, average='macro')}")
  print('-'*30)
  print("Classification Report: ")
  print(classification_report(y_true, ypred))
  print('-'*30)

In [None]:
df= pd.concat([X, y], axis=1)
pd.set_option('display.max_columns', None)

In [None]:
df.head()

Unnamed: 0,radius1,texture1,perimeter1,area1,smoothness1,compactness1,concavity1,concave_points1,symmetry1,fractal_dimension1,radius2,texture2,perimeter2,area2,smoothness2,compactness2,concavity2,concave_points2,symmetry2,fractal_dimension2,radius3,texture3,perimeter3,area3,smoothness3,compactness3,concavity3,concave_points3,symmetry3,fractal_dimension3,Diagnosis
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,M
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,M
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,M
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,M
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,M


In [None]:
print(df.shape)

(569, 31)


In [None]:
df["Diagnosis"].value_counts() # problem involve binary classification

Unnamed: 0_level_0,count
Diagnosis,Unnamed: 1_level_1
B,357
M,212


In [None]:
df.isnull().sum().sum()

np.int64(0)

In [None]:
df.duplicated().sum()

np.int64(0)

In [None]:
y= df["Diagnosis"].values
x= df.drop(["Diagnosis"], axis=1)

In [None]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.2, random_state= 42)

In [None]:
gaussian_nb= GaussianNB()
gaussian_nb.fit(x_train, y_train)


In [None]:
ypred_gaussian= gaussian_nb.predict(x_test)

In [None]:
print_report(y_test, ypred_gaussian, "GaussianNB")

------------------------------
GaussianNB
------------------------------
Confusion Matrix: 
[[71  0]
 [ 3 40]]
------------------------------
Accuracy_score: 0.9736842105263158
------------------------------
Recall_score: 0.9651162790697674
------------------------------
Precision_score: 0.9797297297297297
------------------------------
F1_score: 0.9715828832571666
------------------------------
Classification Report: 
              precision    recall  f1-score   support

           B       0.96      1.00      0.98        71
           M       1.00      0.93      0.96        43

    accuracy                           0.97       114
   macro avg       0.98      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

------------------------------


In [None]:
multinomial_nb= MultinomialNB(alpha= 0.8, )
multinomial_nb.fit(x_train, y_train)

In [None]:
ypred_multinomial= multinomial_nb.predict(x_test)

In [None]:
print_report(y_test, ypred_multinomial, "MultinomialNB")

------------------------------
MultinomialNB
------------------------------
Confusion Matrix: 
[[71  0]
 [ 7 36]]
------------------------------
Accuracy_score: 0.9385964912280702
------------------------------
Recall_score: 0.9186046511627908
------------------------------
Precision_score: 0.9551282051282051
------------------------------
F1_score: 0.9322062696457395
------------------------------
Classification Report: 
              precision    recall  f1-score   support

           B       0.91      1.00      0.95        71
           M       1.00      0.84      0.91        43

    accuracy                           0.94       114
   macro avg       0.96      0.92      0.93       114
weighted avg       0.94      0.94      0.94       114

------------------------------


In [None]:
xprime_train, xprime_test, yprime_train, yprime_test= train_test_split(x, y, test_size= 0.4, random_state= 42)
bernoulli_nb= BernoulliNB(binarize= 0.45)
bernoulli_nb.fit(xprime_train, yprime_train)

In [None]:
ypred_bernoulli= bernoulli_nb.predict(xprime_test)

In [None]:
print_report(yprime_test, ypred_bernoulli, "BernoulliNB")

------------------------------
BernoulliNB
------------------------------
Confusion Matrix: 
[[134  14]
 [ 15  65]]
------------------------------
Accuracy_score: 0.8728070175438597
------------------------------
Recall_score: 0.8589527027027026
------------------------------
Precision_score: 0.8610568345934926
------------------------------
F1_score: 0.8599834826249921
------------------------------
Classification Report: 
              precision    recall  f1-score   support

           B       0.90      0.91      0.90       148
           M       0.82      0.81      0.82        80

    accuracy                           0.87       228
   macro avg       0.86      0.86      0.86       228
weighted avg       0.87      0.87      0.87       228

------------------------------


In [None]:
4