In [40]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import math

In [29]:
data = pd.read_csv('mushrooms.csv')

In [30]:
data

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l


In [31]:
# Assigning int value to char
for i in data.columns:
  for j in range(len(data)):
    data[i][j] = ord(data[i][j])

In [42]:
# Normalizing the data (Min-Max Normalization)
data_max = np.amax(data, axis=0)
data_min = np.amin(data, axis=0)

# veil-type has only one value
# Preventing hinderance to min-max normalization
data_min['veil-type'] = 0

norm_data = (data - data_min) / (data_max - data_min)
norm_data

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,1,0.684211,0.521739,1,0.625,1,0,1,0.391304,0,0.745098,0.684211,0.684211,0.913043,0.913043,1,0.818182,0.166667,1,0.391304,0.75,0.894737
1,0,1,0.684211,1,1,0,1,0,0,0.391304,0,0.705882,0.684211,0.684211,0.913043,0.913043,1,0.818182,0.166667,1,0.521739,0.541667,0.157895
2,0,0,0.684211,0.913043,1,0.458333,1,0,0,0.521739,0,0.705882,0.684211,0.684211,0.913043,0.913043,1,0.818182,0.166667,1,0.521739,0.541667,0.473684
3,1,1,1,0.913043,1,0.625,1,0,1,0.521739,0,0.745098,0.684211,0.684211,0.913043,0.913043,1,0.818182,0.166667,1,0.391304,0.75,0.894737
4,0,1,0.684211,0.217391,0,0.541667,1,1,0,0.391304,1,0.745098,0.684211,0.684211,0.913043,0.913043,1,0.818182,0.166667,0,0.521739,0,0.157895
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,0,0.409091,0.684211,0.521739,0,0.541667,0,0,0,1,0,0,0.684211,0.684211,0.565217,0.565217,1,0.0909091,0.166667,1,0,0.0833333,0.421053
8120,0,1,0.684211,0.521739,0,0.541667,0,0,0,1,0,0,0.684211,0.684211,0.565217,0.565217,1,0,0.166667,1,0,0.875,0.421053
8121,0,0.181818,0.684211,0.521739,0,0.541667,0,0,0,0.521739,0,0,0.684211,0.684211,0.565217,0.565217,1,0.0909091,0.166667,1,0,0.0833333,0.421053
8122,1,0.409091,1,0.521739,0,1,1,0,1,0,1,0,0.684211,0.263158,0.913043,0.913043,1,0.818182,0.166667,0,0.913043,0.875,0.421053


In [58]:
# Shuffling the dataset
train = norm_data.sample(frac=0.8).reset_index(drop=True)
dev = norm_data.sample(frac=0.1).reset_index(drop=True)
test = norm_data.sample(frac=0.1).reset_index(drop=True)

In [57]:
# For splitting target vector from feature frame
def split_target(df):
  train_df = pd.DataFrame()
  target_df = pd.DataFrame()
  for i in df.columns:
    if i == 'class':
      target_df = df[i]
    else:
      train_df = train_df.append(test[i])
  train_df = pd.DataFrame.transpose(train_df)
  return train_df, target_df

In [59]:
# Splitting for all train, test and dev sets
x_train, y_train = split_target(train)
x_dev, y_dev = split_target(dev)
x_test, y_test = split_target(test)

In [64]:
# Feature selection