In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Libraries

In [None]:
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import r2_score, accuracy_score, mean_absolute_error, mean_squared_error, roc_curve, auc, confusion_matrix, classification_report

# Loading Dataset

In [None]:
!ls ../input/mushroom-classification/mushrooms.csv

# Data Preprocessing

In [None]:
data = pd.read_csv("../input/mushroom-classification/mushrooms.csv")
data

In [None]:
data.columns

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.shape

In [None]:
data.isnull().sum()

Converting string to integer

In [None]:
la = LabelEncoder()
for i in data.columns:
    data[i] = la.fit_transform(data[i])

Checking for class imbalancing

In [None]:
data['class'].value_counts()

In [None]:
cor = data.corr()
rela = cor['class'].sort_values(ascending = False)
rela

In [None]:
plt.subplots(figsize=(12, 12))
sns.heatmap(cor, annot = True)

In [None]:
data['veil-type']

In [None]:
data.drop('veil-type', inplace = True, axis=1)

# Designing Model

In [None]:
x= []
for i in range(len(rela)):
    if rela[i]>0:
        x.append(rela.index[i])
x

In [None]:
x = data[x]
x.drop('class', inplace = True, axis = 1)
x

In [None]:
y = data['class']
y

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.25, random_state=42)

In [None]:
results = pd.DataFrame()
Name = []
Accuracy = []

# Applying Logistic Regression

In [None]:
lr = LogisticRegression()
lr.fit(xtrain, ytrain)

In [None]:
predicted = lr.predict(xtest)
predicted

In [None]:
print("Accuracy score using Logistic Regression is: {}%".format(accuracy_score(ytest, predicted)*100))

In [None]:
Name.append(lr)
Accuracy.append(accuracy_score(ytest, predicted)*100)

In [None]:
fpr, tpr, threshold= roc_curve(ytest, predicted, pos_label=1)
plt.plot(fpr, tpr)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC curve")
plt.show()
print("AUC value is {} ".format(auc(fpr, tpr)))

In [None]:
confu = confusion_matrix(ytest, predicted, labels = [0,1])
sns.heatmap(confu, annot=True,fmt='d')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")

In [None]:
print("Classification Report for our model is ")
print(classification_report(ytest, predicted))

In [None]:
# Predict on training and test sets
train_pred = lr.predict(xtrain)
test_pred = lr.predict(xtest)

In [None]:
# Calculate accuracy scores
train_accuracy = accuracy_score(ytrain, train_pred)
test_accuracy = accuracy_score(ytest, test_pred)

In [None]:
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Data
labels = ['Training Accuracy', 'Test Accuracy']
values = [train_accuracy, test_accuracy]

x = np.arange(len(labels))

In [None]:
# Plotting
plt.figure(figsize=(8, 6))
plt.bar(x, values, color=['blue', 'orange'])
plt.xlabel('Accuracy')
plt.ylabel('Score')
plt.title('Training vs Test Accuracy')
plt.xticks(x, labels)
plt.ylim(0, 1)  # Limit y-axis to [0, 1] for accuracy scores
plt.show()