---
$Import$

---

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from sklearn.model_selection import cross_val_score

---
$Data$

---

In [None]:
df = pd.read_csv("mushrooms_data.csv")

In [None]:
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g


---
Oh no! it's categorical. So let's change it to numerics.

---

In [None]:
le = LabelEncoder()
df["class"] = le.fit_transform(df["class"])

In [None]:
columns = df.columns

In [None]:
for i in columns:
  le = LabelEncoder()
  df[i] = le.fit_transform(df[i])
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,5,2,4,1,6,1,0,1,4,0,3,2,2,7,7,0,2,1,4,2,3,5
1,0,5,2,9,1,0,1,0,0,4,0,2,2,2,7,7,0,2,1,4,3,2,1
2,0,0,2,8,1,3,1,0,0,5,0,2,2,2,7,7,0,2,1,4,3,2,3
3,1,5,3,8,1,6,1,0,1,5,0,3,2,2,7,7,0,2,1,4,2,3,5
4,0,5,2,3,0,5,1,1,0,4,1,3,2,2,7,7,0,2,1,0,3,0,1


---
Yup!!, Done it

---

In [None]:
data = df.copy() # I don't want to change main/original data
Y_data = data.pop("class")
X_data = data
X_train, X_test,Y_train,Y_test= train_test_split(X_data,Y_data,stratify=Y_data,test_size=0.2)

---
Staring with :

* $LogisticRegression$

---

In [None]:
Lr = LogisticRegression(C=0.1).fit(X_train,Y_train)
print(f"Accuracy : {Lr.score(X_test,Y_test)}")

Accuracy : 0.9298461538461539


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


---
In search for better performance.

---

In [None]:
gs_lr = GridSearchCV(Lr,param_grid={"C":[0.001,0.01,0.1,1.0,10.0,100.0]},cv=10,scoring="accuracy")
gs_lr.fit(X_train,Y_train)

In [None]:
gs_lr.best_params_

{'C': 100.0}

In [None]:
Lr_best = gs_lr.best_estimator_
Lr_best.fit(X_train,Y_train)
print(f"Accuracy : {Lr_best.score(X_test,Y_test)}")

Accuracy : 0.9575384615384616


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


---
Yeah, quite improved. But we have more.

* $DecisionTreeClassifier$

---

In [None]:
DTC = DecisionTreeClassifier(max_depth=10).fit(X_train,Y_train)
print(f"Accuracy : {DTC.score(X_test, Y_test)}")
print(f"CM  : {confusion_matrix(Y_test,DTC.predict(X_test))}")
print(f"F1 Score  : {f1_score(Y_test,DTC.predict(X_test))}")

Accuracy : 1.0
CM  : [[842   0]
 [  0 783]]
F1 Score  : 1.0


---
* $RandomForestClassifier$

---

In [None]:
RFC = RandomForestClassifier(max_depth=10).fit(X_train,Y_train)
print(f"Accuracy : {RFC.score(X_test, Y_test)}")
print(f"CM  : {confusion_matrix(Y_test,RFC.predict(X_test))}")
print(f"F1 Score  : {f1_score(Y_test,RFC.predict(X_test))}")

Accuracy : 1.0
CM  : [[842   0]
 [  0 783]]
F1 Score  : 1.0


---
* $AdaBoostClassifier$

---

In [None]:
AdaBoost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=10)).fit(X_train,Y_train)
print(f"Accuracy : {AdaBoost.score(X_test, Y_test)}")
print(f"CM  : {confusion_matrix(Y_test,AdaBoost.predict(X_test))}")
print(f"F1 Score  : {f1_score(Y_test,AdaBoost.predict(X_test))}")

Accuracy : 1.0
CM  : [[842   0]
 [  0 783]]
F1 Score  : 1.0


---
* $BaggingClassifier$

---

In [None]:
Bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(max_depth=10)).fit(X_train,Y_train)
print(f"Accuracy : {Bagging.score(X_test, Y_test)}")
print(f"CM  : {confusion_matrix(Y_test,Bagging.predict(X_test))}")
print(f"F1 Score  : {f1_score(Y_test,Bagging.predict(X_test))}")

Accuracy : 1.0
CM  : [[842   0]
 [  0 783]]
F1 Score  : 1.0


---
* $ SupportVectormachineClassifier$

---

In [None]:
svc = SVC(C=0.1).fit(X_train,Y_train)
print(f"Accuracy : {svc.score(X_test, Y_test)}")
print(f"CM  : {confusion_matrix(Y_test,svc.predict(X_test))}")
print(f"F1 Score  : {f1_score(Y_test,svc.predict(X_test))}")

Accuracy : 0.9403076923076923
CM  : [[839   3]
 [ 94 689]]
F1 Score  : 0.9342372881355931


---
Tuning time!!!

---

In [None]:
gs_svc = GridSearchCV(svc,param_grid={"C":[0.001,0.01,0.1,1.0,10.0,100.0]},cv=10,scoring="accuracy").fit(X_train,Y_train)
svc_best = gs_svc.best_estimator_.fit(X_train,Y_train)
print(f"Accuracy : {svc_best.score(X_test, Y_test)}")
print(f"CM  : {confusion_matrix(Y_test,svc_best.predict(X_test))}")
print(f"F1 Score  : {f1_score(Y_test,svc_best.predict(X_test))}")

Accuracy : 1.0
CM  : [[842   0]
 [  0 783]]
F1 Score  : 1.0


---
There isn't much with this dataset. Hence, couldn't do much. Thanks.
See you next time.

---