# Naive Bayes Algorithm 

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Importing the Training & Testing dataset

In [3]:
df1 = pd.read_csv("example_train.csv")
df2 = pd.read_csv("example_test.csv")

In [4]:
df1

Unnamed: 0,Document,Class
0,Upgrad is a great educational institution.,education
1,Educational greatness depends on ethics,education
2,A story of great ethics and educational greatness,education
3,Sholey is a great cinema,cinema
4,good movie depends on good story,cinema


In [5]:
df2

Unnamed: 0,Document,Class
0,very good educational institution,education


## Pre-processing the datasets

In [6]:
df1['Class'] = df1['Class'].map({'education':1,'cinema':0})

In [7]:
df1

Unnamed: 0,Document,Class
0,Upgrad is a great educational institution.,1
1,Educational greatness depends on ethics,1
2,A story of great ethics and educational greatness,1
3,Sholey is a great cinema,0
4,good movie depends on good story,0


In [8]:
X_train = df1.values[:,0]
y_train = df1.values[:,1]
y_train = y_train.astype('int')

In [9]:
df2['Class'] = df2['Class'].map({'education':1,'cinema':0})

In [10]:
df2

Unnamed: 0,Document,Class
0,very good educational institution,1


In [11]:
X_test = df2.values[:,0]
y_test = df2.values[:,1]
y_test = y_test.astype('int')

## Feature Extraction by using Word-bag

In [12]:
from sklearn.feature_extraction.text import CountVectorizer 

vc = CountVectorizer(stop_words='english')
vc.fit(X_train)
vc.vocabulary_

{'upgrad': 11,
 'great': 5,
 'educational': 2,
 'institution': 7,
 'greatness': 6,
 'depends': 1,
 'ethics': 3,
 'story': 10,
 'sholey': 9,
 'cinema': 0,
 'good': 4,
 'movie': 8}

In [13]:
print(vc.get_feature_names_out())
print(len(vc.get_feature_names_out()))

['cinema' 'depends' 'educational' 'ethics' 'good' 'great' 'greatness'
 'institution' 'movie' 'sholey' 'story' 'upgrad']
12


## Encoding

In [14]:
X_train_transformed = vc.transform(X_train)

In [15]:
X_test_transformed = vc.transform(X_test)

In [16]:
X_train_transformed1 = X_train_transformed.toarray()
X_train_transformed1

array([[0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1],
       [0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
       [0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 1, 0]], dtype=int64)

In [17]:
X_test_transformed1 = X_test_transformed.toarray()
X_test_transformed1

array([[0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0]], dtype=int64)

## Multinomial Naive Bayes Classifier

In [18]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, plot_confusion_matrix

In [19]:
from sklearn.naive_bayes import MultinomialNB

mnb=MultinomialNB()
mnb.fit(X_train_transformed1, y_train)
y_predMNB = mnb.predict(X_test_transformed1)

acc_mnbTrain = mnb.score(X_train_transformed1, y_train) * 100
acc_mnbTest = accuracy_score(y_predMNB, y_test) * 100
print("Training Accuracy: ", acc_mnbTrain)
print("Validation Accuracy: ", acc_mnbTest)

Training Accuracy:  100.0
Validation Accuracy:  100.0


## Bernoulli Naive Bayes Classifier

In [20]:
from sklearn.naive_bayes import BernoulliNB

bnb = BernoulliNB()
bnb.fit(X_train_transformed1, y_train)
y_predBNB = bnb.predict(X_test_transformed1)

acc_bnbTrain = bnb.score(X_train_transformed1, y_train) * 100
acc_bnbTest = accuracy_score(y_predBNB, y_test) * 100
print("Training Accuracy: ", acc_bnbTrain)
print("Validation Accuracy: ", acc_bnbTest)

Training Accuracy:  100.0
Validation Accuracy:  100.0
