<hr>

#**Naive Bayes Classifier**

<hr>

In [None]:
import warnings
warnings.filterwarnings("ignore")

**Mandatory Libraries**

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

**Machine Learning Libraries**

In [None]:
from sklearn.metrics import *
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

**Mounting Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<hr>

# **Data Loading Phase**

**Data Loading**

In [None]:
df = pd.read_csv("/content/drive/MyDrive/secondary_data.csv", sep = ";")

**Data Inspection**

In [None]:
df.head()

Unnamed: 0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,p,15.26,x,g,o,f,e,,w,16.95,...,s,y,w,u,w,t,g,,d,w
1,p,16.6,x,g,o,f,e,,w,17.99,...,s,y,w,u,w,t,g,,d,u
2,p,14.07,x,g,o,f,e,,w,17.8,...,s,y,w,u,w,t,g,,d,w
3,p,14.17,f,h,e,f,e,,w,15.77,...,s,y,w,u,w,t,p,,d,w
4,p,14.64,x,h,o,f,e,,w,16.53,...,s,y,w,u,w,t,p,,d,w


**Class Balancement**

In [None]:
df["class"].value_counts(normalize = True)

Unnamed: 0_level_0,proportion
class,Unnamed: 1_level_1
p,0.554913
e,0.445087


**Nearly balanced data**

**Shape Inspection**

In [None]:
a = df.shape
print(f"Rows: {a[0]} & Columns: {a[1]}")

Rows: 61069 & Columns: 21


<hr>

# **Data Pre-processing**

In [None]:
df.isnull().sum()

Unnamed: 0,0
class,0
cap-diameter,0
cap-shape,0
cap-surface,14120
cap-color,0
does-bruise-or-bleed,0
gill-attachment,9884
gill-spacing,25063
gill-color,0
stem-height,0


In [None]:
df = df.drop(["cap-surface", "gill-attachment", "spore-print-color", "ring-type", "veil-color", "veil-type", "gill-spacing", "stem-root", "stem-surface"], axis = 1)

In [None]:
df.isnull().sum()

Unnamed: 0,0
class,0
cap-diameter,0
cap-shape,0
cap-color,0
does-bruise-or-bleed,0
gill-color,0
stem-height,0
stem-width,0
stem-color,0
has-ring,0


In [None]:
df.duplicated().sum()

166

In [None]:
df = df.drop_duplicates()

<hr>

# **Feature Engineering**

**Encoding**

In [None]:
encoder = LabelEncoder()

for x in df.columns:
  if df[x].dtype == "object":
    df[x] = encoder.fit_transform(df[x])

<hr>

# **Modelling Phase**

**Feature Division**

In [None]:
# Feature data
x = df.drop("class", axis = 1)

# Target data
y = df["class"]

**Data Division**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.30, random_state = 42)

**Modelling**

In [None]:
# Creating a instance / object of the model
model = GaussianNB()

In [None]:
# Train the model with the data
model.fit(x_train, y_train)

In [None]:
# Predictions
pred = model.predict(x_test)

In [None]:
# Evaluate the model
print(f"Accuracy Score: {accuracy_score(y_test, pred)}")

Accuracy Score: 0.5866673964205572


In [None]:
print(f"Confusion Matrix: \n{confusion_matrix(y_test, pred)}")

Confusion Matrix: 
[[2394 5697]
 [1855 8325]]


In [None]:
print(f"Classification Report: \n{classification_report(y_test, pred)}")

Classification Report: 
              precision    recall  f1-score   support

           0       0.56      0.30      0.39      8091
           1       0.59      0.82      0.69     10180

    accuracy                           0.59     18271
   macro avg       0.58      0.56      0.54     18271
weighted avg       0.58      0.59      0.56     18271



In [None]:
train_predictions = model.predict(x_train)

train_accuracy = accuracy_score(y_train, train_predictions)

In [None]:
print(f"Train Accuracy: {train_accuracy}")

Train Accuracy: 0.5852645899793583


In [None]:
print(f"Testing Data: {accuracy_score(y_test, pred)}")

Testing Data: 0.5866673964205572
