# **IMPORTING THE NECESSARY LIBRARIES**

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

**IMPORTING THE DATASET**

In [3]:
mushroomds = pd.read_csv("/Users/muralik/Desktop/Datasets/mushrooms.csv")
mushroomds.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [4]:
#Dropping the duplicate values 
mushroomds.drop_duplicates()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


In [5]:
#Getting the shape of the dataset
mushroomds.shape

(8124, 23)

**EXPLORATORY DATA ANALYSIS**

In [6]:
#Encoding the field Class
mushroomds["class"].unique()
mushroomds["class"].replace(["p","e"],[0,1],inplace=True)

In [7]:
#Encoding the field cap-shape
mushroomds["cap-shape"].unique()
mushroomds["cap-shape"].replace(["x","b","s","f","k","c"],[0,1,2,3,4,5],inplace=True)

In [8]:
#Encoding the field cap-surface
mushroomds["cap-surface"].unique()
mushroomds["cap-surface"].replace(["s","y","f","g"],[0,1,2,3],inplace=True)

In [9]:
#Encoding the field cap-color
mushroomds["cap-color"].unique()
mushroomds["cap-color"].replace(['n', 'y', 'w', 'g', 'e', 'p', 'b', 'u', 'c', 'r'],
                               [0,1,2,3,4,5,6,7,8,9],inplace=True)

In [10]:
#Encoding the field bruises
mushroomds["bruises"].unique()
mushroomds["bruises"].replace(["t","f"],[0,1],inplace=True)

In [11]:
#Encoding the field odor
mushroomds["odor"].unique()
mushroomds["odor"].replace(['p', 'a', 'l', 'n', 'f', 'c', 'y', 's', 'm'],
                          [0,1,2,3,4,5,6,7,8],inplace=True)

In [12]:
#Encoding the field gill-attachment
mushroomds["gill-attachment"].unique()
mushroomds["gill-attachment"].replace(["f","a"],[1,0],inplace=True)

In [13]:
labelencode = LabelEncoder()
for i in mushroomds.columns:
    mushroomds[i] = labelencode.fit_transform(mushroomds[i])
mushroomds

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,0,0,0,0,0,0,1,0,1,4,...,2,7,7,0,2,1,4,2,3,5
1,1,0,0,1,0,1,1,0,0,4,...,2,7,7,0,2,1,4,3,2,1
2,1,1,0,2,0,2,1,0,0,5,...,2,7,7,0,2,1,4,3,2,3
3,0,0,1,2,0,0,1,0,1,5,...,2,7,7,0,2,1,4,2,3,5
4,1,0,0,3,1,3,1,1,0,4,...,2,7,7,0,2,1,0,3,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,1,4,0,0,1,3,0,0,0,11,...,2,5,5,0,1,1,4,0,1,2
8120,1,0,0,0,1,3,0,0,0,11,...,2,5,5,0,0,1,4,0,4,2
8121,1,3,0,0,1,3,0,0,0,5,...,2,5,5,0,1,1,4,0,1,2
8122,0,4,1,0,1,6,1,0,1,0,...,1,7,7,0,2,1,0,7,4,2


**SPLITTING THE DATA**

In [14]:
X_log = mushroomds.drop(["class"],axis=1)
Y_log = mushroomds[["class"]]
X_log_train,X_log_test,Y_log_train,Y_log_test = train_test_split(X_log,Y_log,test_size=0.3,random_state=42)
logisticregressor = LogisticRegression(max_iter=500)
logisticregressor.fit(X_log_train,Y_log_train)
pred = logisticregressor.predict(X_log_test)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


**CHECKING THE AUROC SCORE FOR THE MODEL**

In [16]:
Auc = roc_auc_score(Y_log_test,pred)
print(f"The accuracy of the machine learning model is {Auc * 100} %")

The accuracy of the machine learning model is 95.61584677036369 %
