In [1]:
import pandas as pd
import math
import numpy as np

In [12]:
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv('mushrooms.csv')

# Preprocess the data
X = data.iloc[:, 1:]
y = data.iloc[:, 0]
X = pd.get_dummies(X, columns=X.columns)
X = X.values
y = np.array([1 if label == 'p' else 0 for label in y])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Naive Bayes classifier
class_counts = np.bincount(y_train)
priors = class_counts / float(len(y_train))
likelihoods = np.zeros((2, X.shape[1], 2))
for i in range(X.shape[1]):
    for j in range(2):
        feature_counts = np.bincount(X_train[y_train == j, i], minlength=2)
        likelihoods[j, i, :] = feature_counts / float(class_counts[j])

# Test the classifier on training data
y_train_pred = np.zeros_like(y_train)
for i in range(len(y_train)):
    p_edible = priors[0]
    p_poisonous = priors[1]
    for j in range(X.shape[1]):
        p_edible *= likelihoods[0, j, X_train[i, j]]
        p_poisonous *= likelihoods[1, j, X_train[i, j]]
    if p_edible > p_poisonous:
        y_train_pred[i] = 0
    else:
        y_train_pred[i] = 1
train_accuracy = np.mean(y_train_pred == y_train)
print('Training accuracy:', train_accuracy)

# Test the classifier on testing data
y_test_pred = np.zeros_like(y_test)
for i in range(len(y_test)):
    p_edible = priors[0]
    p_poisonous = priors[1]
    for j in range(X.shape[1]):
        p_edible *= likelihoods[0, j, X_test[i, j]]
        p_poisonous *= likelihoods[1, j, X_test[i, j]]
    if p_edible > p_poisonous:
        y_test_pred[i] = 0
    else:
        y_test_pred[i] = 1
test_accuracy = np.mean(y_test_pred == y_test)
print('Testing accuracy:', test_accuracy)

Training accuracy: 0.9943068164332974
Testing accuracy: 0.9938461538461538


## Accuray using inbuilt Library

In [3]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [4]:
indata=data[:]

In [5]:
le=LabelEncoder()
for col in indata.columns:
    indata[col]=le.fit_transform(indata[col])


In [6]:
x=indata.drop('class',axis=1)
y=indata['class']

In [7]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=101)

In [8]:
model=GaussianNB()

In [9]:
model.fit(x_train,y_train)

GaussianNB()

In [10]:
y_predict_train=model.predict(x_train)
y_predict_test=model.predict(x_test)

In [11]:
# Training and Testing Accuracy in inbuilt
print("Percentage accuracy on training data using inbuilt library : ",accuracy_score(y_predict_train,y_train)*100,'%')
print("Percentage accuracy on testing data using inbuilt library : ",accuracy_score(y_predict_test,y_test)*100,'%')


Percentage accuracy on training data using inbuilt library :  92.29686950404502 %
Percentage accuracy on testing data using inbuilt library :  91.42739950779327 %
