In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [None]:
X, y = load_iris(return_X_y=True) # returns only the dependant and independant features of the dataset

In [None]:
# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
# as the data is continuous, we use GaussianNB
from sklearn.naive_bayes import GaussianNB

In [None]:
gnb = GaussianNB()

In [None]:
gnb.fit(X_train, y_train)

In [None]:
y_pred = gnb.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

1.0
[[16  0  0]
 [ 0 18  0]
 [ 0  0 11]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        11

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [None]:
import seaborn as sns

In [None]:
data = sns.load_dataset('tips')

In [None]:
data.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [None]:
X = data.drop(columns = 'smoker')
y = data['smoker']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=10)

In [None]:
from sklearn.preprocessing import LabelEncoder
le1 = LabelEncoder()
le2 = LabelEncoder()

In [None]:
X_train['sex'] = le1.fit_transform(X_train['sex'])
X_train['time'] = le1.fit_transform(X_train['time'])

In [None]:
X_test['sex'] = le2.fit_transform(X_test['sex'])
X_test['time'] = le2.fit_transform(X_test['time'])

In [None]:
y_train = le1.fit_transform(y_train)
y_test = le2.fit_transform(y_test)

In [None]:
y_train

array([1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0])

In [None]:
y_test

array([0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0])

In [None]:
X.head()

Unnamed: 0,total_bill,tip,sex,day,time,size
0,16.99,1.01,Female,Sun,Dinner,2
1,10.34,1.66,Male,Sun,Dinner,3
2,21.01,3.5,Male,Sun,Dinner,3
3,23.68,3.31,Male,Sun,Dinner,2
4,24.59,3.61,Female,Sun,Dinner,4


In [None]:
# one hot encode the sex, time column
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)


In [None]:
X_train

array([[0., 1., 0., ..., 1., 0., 2.],
       [0., 0., 1., ..., 1., 0., 3.],
       [0., 0., 1., ..., 1., 0., 3.],
       ...,
       [0., 1., 0., ..., 1., 0., 2.],
       [0., 0., 0., ..., 0., 1., 6.],
       [0., 0., 1., ..., 1., 0., 2.]])

In [None]:
from sklearn.naive_bayes import BernoulliNB

bnb = BernoulliNB()

In [None]:
bnb.fit(X_train, y_train)

In [None]:
y_pred = bnb.predict(X_test)

In [None]:
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.5737704918032787
[[32  1]
 [25  3]]
              precision    recall  f1-score   support

           0       0.56      0.97      0.71        33
           1       0.75      0.11      0.19        28

    accuracy                           0.57        61
   macro avg       0.66      0.54      0.45        61
weighted avg       0.65      0.57      0.47        61



In [None]:
gnb2 = GaussianNB()

In [None]:
gnb2.fit(X_train, y_train)

In [None]:
y_pred = gnb2.predict(X_test)

In [None]:
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.5737704918032787
[[31  2]
 [24  4]]
              precision    recall  f1-score   support

           0       0.56      0.94      0.70        33
           1       0.67      0.14      0.24        28

    accuracy                           0.57        61
   macro avg       0.62      0.54      0.47        61
weighted avg       0.61      0.57      0.49        61

