# Importing the Required Libraries

In [116]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [117]:
# Load the dataset
data = pd.read_csv('mushroom_trn_data.csv')

In [118]:
pd.set_option('display.max_columns', None)

# Head, Tail and other Info about the Dataset

In [119]:
data.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
1,e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
2,e,f,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
3,p,x,s,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
4,p,x,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p


In [120]:
data.tail()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
7306,e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
7307,e,x,y,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
7308,e,f,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
7309,e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
7310,e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d


In [121]:
#  Mushroom Data: Attribute Information: (classes: edible=e, poisonous=p)

#  cap-shape: bell=b,conical=c,convex=x,flat=f, knobbed=k,sunken=s

#  cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s

#  cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y

#  bruises: bruises=t,no=f

#  odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s

#  gill-attachment: attached=a,descending=d,free=f,notched=n

#  gill-spacing: close=c,crowded=w,distant=d

#  gill-size: broad=b,narrow=n

#  gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g, green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y

#  stalk-shape: enlarging=e,tapering=t

#  stalk-root: bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?

#  stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s

#  stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s

#  stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y

#  stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y

#  veil-type: partial=p,universal=u

#  veil-color: brown=n,orange=o,white=w,yellow=y

#  ring-number: none=n,one=o,two=t

#  ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z

#  spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y

#  population: abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y

#  habitat: grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d

In [122]:
print("Number of Rows", data.shape[0])
print("Number of Columns", data.shape[1])

Number of Rows 7311
Number of Columns 23


# Checking Null Values in the Dataset

In [123]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7311 entries, 0 to 7310
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     7311 non-null   object
 1   cap-shape                 7311 non-null   object
 2   cap-surface               7311 non-null   object
 3   cap-color                 7311 non-null   object
 4   bruises                   7311 non-null   object
 5   odor                      7311 non-null   object
 6   gill-attachment           7311 non-null   object
 7   gill-spacing              7311 non-null   object
 8   gill-size                 7311 non-null   object
 9   gill-color                7311 non-null   object
 10  stalk-shape               7311 non-null   object
 11  stalk-root                7311 non-null   object
 12  stalk-surface-above-ring  7311 non-null   object
 13  stalk-surface-below-ring  7311 non-null   object
 14  stalk-color-above-ring  

In [124]:
data.isnull().sum()

class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

In [125]:
data.describe() 

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
count,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311,7311
unique,2,6,4,10,2,9,2,2,2,12,2,5,4,4,9,9,1,4,3,5,9,6,7
top,e,x,y,n,f,n,f,c,b,b,t,b,s,s,w,w,p,w,o,p,w,v,d
freq,3787,3287,2899,2039,4268,3177,7129,6124,5056,1557,4156,3401,4661,4430,4024,3968,7311,7139,6730,3563,2157,3639,2839


# Storing Feature Matrix in 'X' and Response (Target) in 'Y'

In [126]:
X = data.drop('class', axis=1)
y = data['class']

In [127]:
X

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,f,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
1,f,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
2,f,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
3,x,s,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
4,x,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7306,x,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
7307,x,y,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
7308,f,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
7309,x,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d


In [128]:
y

0       e
1       e
2       e
3       p
4       p
       ..
7306    e
7307    e
7308    e
7309    e
7310    e
Name: class, Length: 7311, dtype: object

# Performing One-Hot Encoding on Categorical Features

In [129]:
encoder = OneHotEncoder()
X_encoded = encoder.fit_transform(X)

In [130]:
feature_names = encoder.get_feature_names_out(X.columns)
print("\nOne-Hot Encoded Feature Names:\n", feature_names)


One-Hot Encoded Feature Names:
 ['cap-shape_b' 'cap-shape_c' 'cap-shape_f' 'cap-shape_k' 'cap-shape_s'
 'cap-shape_x' 'cap-surface_f' 'cap-surface_g' 'cap-surface_s'
 'cap-surface_y' 'cap-color_b' 'cap-color_c' 'cap-color_e' 'cap-color_g'
 'cap-color_n' 'cap-color_p' 'cap-color_r' 'cap-color_u' 'cap-color_w'
 'cap-color_y' 'bruises_f' 'bruises_t' 'odor_a' 'odor_c' 'odor_f' 'odor_l'
 'odor_m' 'odor_n' 'odor_p' 'odor_s' 'odor_y' 'gill-attachment_a'
 'gill-attachment_f' 'gill-spacing_c' 'gill-spacing_w' 'gill-size_b'
 'gill-size_n' 'gill-color_b' 'gill-color_e' 'gill-color_g' 'gill-color_h'
 'gill-color_k' 'gill-color_n' 'gill-color_o' 'gill-color_p'
 'gill-color_r' 'gill-color_u' 'gill-color_w' 'gill-color_y'
 'stalk-shape_e' 'stalk-shape_t' 'stalk-root_?' 'stalk-root_b'
 'stalk-root_c' 'stalk-root_e' 'stalk-root_r' 'stalk-surface-above-ring_f'
 'stalk-surface-above-ring_k' 'stalk-surface-above-ring_s'
 'stalk-surface-above-ring_y' 'stalk-surface-below-ring_f'
 'stalk-surface-below-ring

In [131]:
# Converting the one-hot encoded sparse matrix to a DataFrame
X_encoded_df = pd.DataFrame(X_encoded.toarray(), columns=feature_names)

# Displaying the one-hot encoded DataFrame
print("\nOne-Hot Encoded Dataset:\n", X_encoded_df.head())


One-Hot Encoded Dataset:
    cap-shape_b  cap-shape_c  cap-shape_f  cap-shape_k  cap-shape_s  \
0          0.0          0.0          1.0          0.0          0.0   
1          0.0          0.0          1.0          0.0          0.0   
2          0.0          0.0          1.0          0.0          0.0   
3          0.0          0.0          0.0          0.0          0.0   
4          0.0          0.0          0.0          0.0          0.0   

   cap-shape_x  cap-surface_f  cap-surface_g  cap-surface_s  cap-surface_y  \
0          0.0            0.0            0.0            0.0            1.0   
1          0.0            1.0            0.0            0.0            0.0   
2          0.0            0.0            0.0            1.0            0.0   
3          1.0            0.0            0.0            1.0            0.0   
4          1.0            0.0            0.0            1.0            0.0   

   cap-color_b  cap-color_c  cap-color_e  cap-color_g  cap-color_n  \
0          0.

# Splitting the Dataset Into the Training Set and Testing Set

In [132]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=42)

In [133]:
# Converting X_train from sparse matrix to DataFrame
X_train_df = pd.DataFrame(X_train.toarray(), columns=feature_names)

# Displaying X_train DataFrame
print("X_train Dataset:")
print(X_train_df.head())

X_train Dataset:
   cap-shape_b  cap-shape_c  cap-shape_f  cap-shape_k  cap-shape_s  \
0          0.0          0.0          1.0          0.0          0.0   
1          0.0          0.0          0.0          0.0          0.0   
2          0.0          0.0          1.0          0.0          0.0   
3          0.0          0.0          0.0          0.0          0.0   
4          0.0          0.0          0.0          0.0          0.0   

   cap-shape_x  cap-surface_f  cap-surface_g  cap-surface_s  cap-surface_y  \
0          0.0            1.0            0.0            0.0            0.0   
1          1.0            0.0            0.0            1.0            0.0   
2          0.0            0.0            0.0            0.0            1.0   
3          1.0            0.0            0.0            0.0            1.0   
4          1.0            0.0            0.0            0.0            1.0   

   cap-color_b  cap-color_c  cap-color_e  cap-color_g  cap-color_n  \
0          0.0         

In [134]:
# Displaying y_train
print("\ny_train Dataset:")
print(y_train.head())


y_train Dataset:
613     e
2705    p
3929    p
2840    e
4060    p
Name: class, dtype: object


# Importing the Models

In [135]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Training the Model

In [136]:
# With Random Forest Classifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)

# With KNeighbors Classifier
knc = KNeighborsClassifier()
knc.fit(X_train, y_train)

# With Decision Tree Classifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

# With SVC
svc = SVC()
svc.fit(X_train, y_train)

# Prediction on the Testing Data

In [137]:
y_pred2 = rfc.predict(X_test)
y_pred3 = knc.predict(X_test)
y_pred4 = dt.predict(X_test)
y_pred5 = svc.predict(X_test)

# Evaluating the Algorithm

In [138]:
from sklearn.metrics import accuracy_score

In [139]:
print ("ACC RFC", accuracy_score(y_test, y_pred2))
print ("ACC KNC", accuracy_score(y_test, y_pred3))
print ("ACC DT", accuracy_score(y_test, y_pred4))
print ("ACC SVC", accuracy_score(y_test, y_pred5))

ACC RFC 1.0
ACC KNC 1.0
ACC DT 1.0
ACC SVC 1.0


# Hyperparameter Tuning using GridSearchCV

* Decision Tree Classifier

In [140]:
dt_classifier = DecisionTreeClassifier()
param_grid_dt = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search_dt = GridSearchCV(estimator=dt_classifier, param_grid=param_grid_dt, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_dt.fit(X_train, y_train)
best_params_dt = grid_search_dt.best_params_
best_dt_model = grid_search_dt.best_estimator_

In [141]:
grid_search_dt.best_estimator_

In [142]:
grid_search_dt.best_score_

0.9998044965786901

* SVM Classifier

In [143]:
svm_classifier = SVC()
param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid']
}
grid_search_svm = GridSearchCV(estimator=svm_classifier, param_grid=param_grid_svm, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_svm.fit(X_train, y_train)
best_params_svm = grid_search_svm.best_params_
best_svm_model = grid_search_svm.best_estimator_

In [144]:
grid_search_svm.best_estimator_

In [145]:
grid_search_svm.best_score_

1.0

* KNN Classifier

In [146]:
knn_classifier = KNeighborsClassifier()
param_grid_knn = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}
grid_search_knn = GridSearchCV(estimator=knn_classifier, param_grid=param_grid_knn, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_knn.fit(X_train, y_train)
best_params_knn = grid_search_knn.best_params_
best_knn_model = grid_search_knn.best_estimator_

In [147]:
grid_search_knn.best_estimator_

In [148]:
grid_search_knn.best_score_

1.0

* Random Forest Classifier

In [149]:
rf_classifier = RandomForestClassifier()
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search_rf = GridSearchCV(estimator=rf_classifier, param_grid=param_grid_rf, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_rf.fit(X_train, y_train)
best_params_rf = grid_search_rf.best_params_
best_rf_model = grid_search_rf.best_estimator_

In [150]:
grid_search_rf.best_estimator_

In [151]:
grid_search_rf.best_score_

1.0

# Displaying the best parameters for each classifier

In [152]:
print("\nBest Hyperparameters for Decision Tree:\n", best_params_dt)
print("\nBest Hyperparameters for SVM:\n", best_params_svm)
print("\nBest Hyperparameters for KNN:\n", best_params_knn)
print("\nBest Hyperparameters for Random Forest:\n", best_params_rf)



Best Hyperparameters for Decision Tree:
 {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}

Best Hyperparameters for SVM:
 {'C': 1, 'kernel': 'linear'}

Best Hyperparameters for KNN:
 {'n_neighbors': 3, 'p': 1, 'weights': 'uniform'}

Best Hyperparameters for Random Forest:
 {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}


# Evaluating the models on the test set

In [153]:
y_pred_dt = best_dt_model.predict(X_test)
y_pred_svm = best_svm_model.predict(X_test)
y_pred_knn = best_knn_model.predict(X_test)
y_pred_rf = best_rf_model.predict(X_test)

# K-fold Cross-Validation for Each Classifier

In [154]:
# K-fold cross-validation for Decision Tree
cross_val_scores_dt = cross_val_score(best_dt_model, X_encoded, y, cv=5)
print("\nCross-Validation Scores for Decision Tree:", cross_val_scores_dt)
print("Mean Accuracy:", cross_val_scores_dt.mean())

# K-fold cross-validation for SVM
cross_val_scores_svm = cross_val_score(best_svm_model, X_encoded, y, cv=5)
print("\nCross-Validation Scores for SVM:", cross_val_scores_svm)
print("Mean Accuracy:", cross_val_scores_svm.mean())

# K-fold cross-validation for KNN
cross_val_scores_knn = cross_val_score(best_knn_model, X_encoded, y, cv=5)
print("\nCross-Validation Scores for KNN:", cross_val_scores_knn)
print("Mean Accuracy:", cross_val_scores_knn.mean())

# K-fold cross-validation for Random Forest
cross_val_scores_rf = cross_val_score(best_rf_model, X_encoded, y, cv=5)
print("\nCross-Validation Scores for Random Forest:", cross_val_scores_rf)
print("Mean Accuracy:", cross_val_scores_rf.mean())



Cross-Validation Scores for Decision Tree: [1.         1.         0.99931601 1.         1.        ]
Mean Accuracy: 0.9998632010943911

Cross-Validation Scores for SVM: [1. 1. 1. 1. 1.]
Mean Accuracy: 1.0

Cross-Validation Scores for KNN: [1. 1. 1. 1. 1.]
Mean Accuracy: 1.0

Cross-Validation Scores for Random Forest: [1. 1. 1. 1. 1.]
Mean Accuracy: 1.0


# Confusion Matrices for Each Classifier

In [155]:
print("\nConfusion Matrix for Decision Tree:\n", confusion_matrix(y_test, y_pred_dt))
print("\nConfusion Matrix for SVM:\n", confusion_matrix(y_test, y_pred_svm))
print("\nConfusion Matrix for KNN:\n", confusion_matrix(y_test, y_pred_knn))
print("\nConfusion Matrix for Random Forest:\n", confusion_matrix(y_test, y_pred_rf))


Confusion Matrix for Decision Tree:
 [[1114    0]
 [   0 1080]]

Confusion Matrix for SVM:
 [[1114    0]
 [   0 1080]]

Confusion Matrix for KNN:
 [[1114    0]
 [   0 1080]]

Confusion Matrix for Random Forest:
 [[1114    0]
 [   0 1080]]


# Classification Reports for Each Classifier

In [156]:
print("\nClassification Report for Decision Tree:\n", classification_report(y_test, y_pred_dt))
print("\nClassification Report for SVM:\n", classification_report(y_test, y_pred_svm))
print("\nClassification Report for KNN:\n", classification_report(y_test, y_pred_knn))
print("\nClassification Report for Random Forest:\n", classification_report(y_test, y_pred_rf))


Classification Report for Decision Tree:
               precision    recall  f1-score   support

           e       1.00      1.00      1.00      1114
           p       1.00      1.00      1.00      1080

    accuracy                           1.00      2194
   macro avg       1.00      1.00      1.00      2194
weighted avg       1.00      1.00      1.00      2194


Classification Report for SVM:
               precision    recall  f1-score   support

           e       1.00      1.00      1.00      1114
           p       1.00      1.00      1.00      1080

    accuracy                           1.00      2194
   macro avg       1.00      1.00      1.00      2194
weighted avg       1.00      1.00      1.00      2194


Classification Report for KNN:
               precision    recall  f1-score   support

           e       1.00      1.00      1.00      1114
           p       1.00      1.00      1.00      1080

    accuracy                           1.00      2194
   macro avg       

# Executing the Best Framework on the Test Data

In [157]:
# Load the test data
test_data = pd.read_csv('mushroom_tst_data.csv')


In [158]:
pd.set_option('display.max_columns', None)

In [159]:
test_data.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,x,s,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
1,k,y,c,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
2,x,s,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
3,x,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
4,f,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p


In [160]:
encoder = OneHotEncoder()
test_data_encoded = encoder.fit_transform(test_data)

In [161]:
feature_names = encoder.get_feature_names_out(test_data.columns)
print("\nOne-Hot Encoded Feature Names:\n", feature_names)


One-Hot Encoded Feature Names:
 ['cap-shape_b' 'cap-shape_f' 'cap-shape_k' 'cap-shape_s' 'cap-shape_x'
 'cap-surface_f' 'cap-surface_s' 'cap-surface_y' 'cap-color_b'
 'cap-color_c' 'cap-color_e' 'cap-color_g' 'cap-color_n' 'cap-color_p'
 'cap-color_r' 'cap-color_u' 'cap-color_w' 'cap-color_y' 'bruises_f'
 'bruises_t' 'odor_a' 'odor_c' 'odor_f' 'odor_l' 'odor_m' 'odor_n'
 'odor_p' 'odor_s' 'odor_y' 'gill-attachment_a' 'gill-attachment_f'
 'gill-spacing_c' 'gill-spacing_w' 'gill-size_b' 'gill-size_n'
 'gill-color_b' 'gill-color_e' 'gill-color_g' 'gill-color_h'
 'gill-color_k' 'gill-color_n' 'gill-color_o' 'gill-color_p'
 'gill-color_r' 'gill-color_u' 'gill-color_w' 'gill-color_y'
 'stalk-shape_e' 'stalk-shape_t' 'stalk-root_?' 'stalk-root_b'
 'stalk-root_c' 'stalk-root_e' 'stalk-root_r' 'stalk-surface-above-ring_f'
 'stalk-surface-above-ring_k' 'stalk-surface-above-ring_s'
 'stalk-surface-above-ring_y' 'stalk-surface-below-ring_f'
 'stalk-surface-below-ring_k' 'stalk-surface-below-ring_

In [162]:
# Converting the one-hot encoded sparse matrix to a DataFrame
test_data_encoded_df = pd.DataFrame(test_data_encoded.toarray(), columns=feature_names)

# Displaying the one-hot encoded DataFrame
print("\nOne-Hot Encoded Dataset:\n", test_data_encoded_df.head())


One-Hot Encoded Dataset:
    cap-shape_b  cap-shape_f  cap-shape_k  cap-shape_s  cap-shape_x  \
0          0.0          0.0          0.0          0.0          1.0   
1          0.0          0.0          1.0          0.0          0.0   
2          0.0          0.0          0.0          0.0          1.0   
3          0.0          0.0          0.0          0.0          1.0   
4          0.0          1.0          0.0          0.0          0.0   

   cap-surface_f  cap-surface_s  cap-surface_y  cap-color_b  cap-color_c  \
0            0.0            1.0            0.0          0.0          0.0   
1            0.0            0.0            1.0          0.0          1.0   
2            0.0            1.0            0.0          0.0          0.0   
3            0.0            0.0            1.0          0.0          0.0   
4            0.0            1.0            0.0          0.0          0.0   

   cap-color_e  cap-color_g  cap-color_n  cap-color_p  cap-color_r  \
0          0.0          1

In [163]:
# Using the best framework/model to predict class labels for the test data
test_predictions = best_rf_model.predict(X_test)


In [164]:
# Create a DataFrame with test predictions
predictions_df = pd.DataFrame({'Predicted_Label': test_predictions})

# Save the predictions to a text file
predictions_df.to_csv('predictions.txt', index=False)
