In [26]:
import pandas as pd
import pandas_gbq as pdq
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report

sql = "SELECT * FROM `cloud-midterm-ml.MidtermMLDataset.maintable`"
mushroomData = pdq.read_gbq(sql)

mushroomData

Downloading: 100%|[32m██████████[0m|


Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,y,e,False,m,f,c,b,w,...,y,c,c,p,w,n,n,w,c,d
1,p,k,y,c,False,m,a,c,b,w,...,y,c,c,p,w,n,n,w,c,d
2,p,k,y,n,False,m,f,c,b,w,...,y,c,c,p,w,n,n,w,c,d
3,p,f,y,e,False,m,a,c,b,w,...,y,c,c,p,w,n,n,w,c,d
4,p,k,y,e,False,m,f,c,b,y,...,y,c,c,p,w,n,n,w,c,d
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,f,y,n,True,n,f,c,b,w,...,s,e,e,p,w,t,e,w,c,w
8120,e,k,s,e,True,n,f,c,b,e,...,s,e,e,p,w,t,e,w,c,w
8121,e,x,y,n,True,n,f,c,b,w,...,s,e,w,p,w,t,e,w,c,w
8122,e,k,s,e,True,n,f,c,b,e,...,s,w,w,p,w,t,e,w,c,w


In [27]:
selectedCols = ['class','bruises','gill-attachment','gill-spacing','gill-size','stalk-shape']
labelEncoder = LabelEncoder()

for col in selectedCols:
    mushroomData[col] = labelEncoder.fit_transform(mushroomData[col])
    print(f"\nEncoding for column: {col}")
    for i, class_ in enumerate(labelEncoder.classes_):
        print(f"{class_} => {i}")



Encoding for column: class
e => 0
p => 1

Encoding for column: bruises
0.0 => 0
1.0 => 1

Encoding for column: gill-attachment
a => 0
f => 1

Encoding for column: gill-spacing
c => 0
w => 1

Encoding for column: gill-size
b => 0
n => 1

Encoding for column: stalk-shape
e => 0
t => 1


In [6]:
X = mushroomData[['bruises', 'gill-attachment', 'gill-spacing', 'gill-size', 'stalk-shape']]
y = mushroomData['class']
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train, y_train)

y_pred = knn.predict(x_test)

print("\nKNN Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


KNN Accuracy: 0.9098966026587888

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.94      0.92      2123
           1       0.93      0.88      0.90      1939

    accuracy                           0.91      4062
   macro avg       0.91      0.91      0.91      4062
weighted avg       0.91      0.91      0.91      4062



In [10]:
bruises = int(input("Bruises (1 for Yes, 0 for No): "))
gillA = int(input("Gill Attachment: (1 for Free, 0 for Attached): "))
gillSP = int(input("Gill Spacing: (1 for Crowded, 0 for Close): "))
gillSZ = int(input("Gill Size: (1 for Narrow, 0 for Broad): ")) 
stalk = int(input("Stalk Shape: (1 for Tapering, 0 for Enlarging): "))

mushroomInput = pd.DataFrame({'bruises':[bruises],'gill-attachment':[gillA],'gill-spacing':[gillSP],
                             'gill-size':[gillSZ],'stalk-shape':[stalk]})

pred = knn.predict(mushroomInput)

if pred == 0:
    print("This mushroom is poisonous")
else:
    print("This mushroom is edible")

Bruises (1 for Yes, 0 for No):  1
Gill Attachment: (1 for Free, 0 for Attached):  0
Gill Spacing: (1 for Crowded, 0 for Close):  0
Gill Size: (1 for Narrow, 0 for Broad):  1
Stalk Shape: (1 for Tapering, 0 for Enlarging):  0


This mushroom is edible
