In [15]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score

In [16]:
x, y = datasets.load_iris(return_X_y = True)

In [17]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=45, stratify=y)
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(120, 4) (120,) (30, 4) (30,)


# K Nearest Neighbor Classifier

In [18]:
knn = KNeighborsClassifier(n_neighbors=11, weights='distance')
knn.fit(x_train, y_train)

pred_knn = knn.predict(x_test)
print(accuracy_score(y_test, pred_knn))
print(confusion_matrix(y_test, pred_knn))

0.9333333333333333
[[10  0  0]
 [ 0  9  1]
 [ 0  1  9]]


# Gaussian Naive Bayes

In [19]:
gnb = GaussianNB()
gnb.fit(x_train, y_train)

pred_gnb = gnb.predict(x_test)
print(accuracy_score(y_test, pred_gnb))
print(confusion_matrix(y_test, pred_gnb))

0.9
[[10  0  0]
 [ 0  9  1]
 [ 0  2  8]]


# Multinomial Naive Bayes

In [20]:
import pandas as pd
import re
from sklearn.feature_extraction.text import CountVectorizer
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [21]:
dataset = pd.DataFrame(dataset)
dataset.columns = ["Text", "Reviews"]
 
nltk.download('stopwords')
 
corpus = []
 
for i in range(0, 5):
    text = re.sub('[^a-zA-Z]', '', dataset['Text'][i])
    text = text.lower()
    text = text.split()
    ps = PorterStemmer()
    text = ''.join(text)
    corpus.append(text)

# creating bag of words model
cv = CountVectorizer(max_features = 1500)
 
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values
 
X_train, X_test, y_train, y_test = train_test_split(
           X, y, test_size = 0.25, random_state = 42)

from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix
 
classifier = MultinomialNB()
classifier.fit(X_train, y_train)
 
# predicting test set results
y_pred = classifier.predict(X_test)
 
# making the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm

ValueError: Length mismatch: Expected axis has 0 elements, new values have 2 elements

# Bernoulli Naive Bayes

In [None]:
from sklearn.naive_bayes import BernoulliNB

classifier = BernoulliNB()
classifier.fit(X_train, y_train)
 
# predicting test set results
y_pred = classifier.predict(X_test)
 
# making the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[0, 1],
       [0, 1]], dtype=int64)