# Naive Bayes

In [48]:
import sklearn
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer() # loading data

# getting columns stored to variables
label_names = data['target_names'] 
labels = data['target']
feature_names = data['feature_names']
features = data['data']

print(label_names)

['malignant' 'benign']


In [49]:
print(feature_names[0])

mean radius


In [50]:
print(features[0])

[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
 1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02
 6.399e-03 4.904e-02 5.373e-02 1.587e-02 3.003e-02 6.193e-03 2.538e+01
 1.733e+01 1.846e+02 2.019e+03 1.622e-01 6.656e-01 7.119e-01 2.654e-01
 4.601e-01 1.189e-01]


In [51]:
from sklearn.model_selection import train_test_split # for splitting dataset into testing and training data
from sklearn.naive_bayes import GaussianNB # Naive Bayes algorithm for building the model
from sklearn.metrics import accuracy_score # for accuracy

train, test, train_labels, test_labels = train_test_split(features, labels, test_size=0.40, random_state=42)

In [52]:
gnb = GaussianNB()

In [53]:
# we train the model by fitting it to the data by using gnb.fit()
model = gnb.fit(train, train_labels)

#### The output will be a series of 0s & 1s which are the predicted values for Malignant & Benign tumours

In [54]:
preds = gnb.predict(test)
print(preds)

[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 0 0 1 1 1 0 0 1 1 0 1 0 0 1 1 0 0 0 1 1 1 0 1 1 0 0 1 0 1 1 0 1 0 0
 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 0
 0 1 1 0 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1
 0 0 1 1 0 1]


#### The results shows that the Naive Bayes classifier is 95.17% accurate,
#### which can be chaged by changing the value of 'test_size' in train_train_test_split

In [55]:
acc = accuracy_score(test_labels, preds) # acc used here
print("Accuracy:", round(acc*100, 3),"%")

Accuracy: 95.175 %
