In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import precision_recall_fscore_support
from sklearn.tree import export_graphviz
from IPython.display import SVG
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

import warnings
warnings.filterwarnings('ignore')

# read in dataframe
df = pd.read_csv('mushrooms.csv', names=['Class', 'cap_shape', 'cap_surface', 'cap_color', 'bruises', 'odor', 'gill_attachment', 
                                         'gill_spacing', 'gill_size', 'gill_color', 'stalk_shape', 'stalk_root', 
                                         'stalk_surface', 'stalk_surface_above_ring', 'stalk_surface_below_ring', 
                                         'viel_type', 'veil_color', 'ring_number', 'ring_type', 'spore_print_color', 
                                         'population', 'habitat'])

# quantify variables in the dataframe
cap_shape = pd.get_dummies(df.cap_shape, dtype=int)
cap_shape.columns = ['01', '02', '03', '04', '05']
cap_surface = pd.get_dummies(df.cap_surface, dtype=int)
cap_surface.columns = ['06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16']
cap_color = pd.get_dummies(df.cap_color, dtype=int)
cap_color.columns = ['17', '18', '19']
bruises = pd.get_dummies(df.bruises, dtype=int)
bruises.columns = ['20', '21', '22', '23', '24', '25', '26', '27', '28', '29']
all = pd.concat([cap_shape, cap_surface, cap_color, bruises], axis=1)
display(all)

### Split data

In [None]:
X = all.drop('class')
y = df.index[1:]

X_train, X_test, y_train, y_test = train_test_split(X, y)

# scale all data
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

### Neural nets

In [None]:
# do neural net training
iterations = 30
fold = 10

spread = []
for i in range(iterations):
    # confusion matrix info
    precision = []
    recall = []
    f1 = []

    # neural net info
    mlp = MLPClassifier(hidden_layer_sizes=(2, 1), max_iter=iterations)
    mlp.fit(X_train,y_train)

    # fold cross validation
    while (len(f1) < fold):
        predictions = mlp.predict(X_test)
        matrix = confusion_matrix(y_test, predictions)
       
        if (len(matrix) == 2):
            precision.append(matrix[0][0] / (matrix[0][0] + matrix[0][1]))
            recall.append(matrix[0][0] / (matrix[0][0] + matrix[1][0]))
            f1.append((2 * precision[-1] * recall[-1]) / (precision[-1] + recall[-1]))

    display(spread)
    spread.append(sum(f1) / len(f1))



In [None]:
import matplotlib.pyplot as plt
 
# plotting labelled histogram
plt.hist(spread)
plt.xlabel('f1 Scores')
plt.ylabel('Occurance')
plt.show()

### Decision Tree

In [None]:
pip install graphviz # run this if you get an error in hte next block

In [None]:

# You may need to install the Python graphviz library. At the command line:
#   pip install graphviz
# You will also need to install the graphviz executables. You can use apt,
# macports, or other installer for your system.
from graphviz import Source


In [None]:
# get fresh data
#df = pd.read_csv('mushrooms.csv')
#X = df.loc[:, df.columns != 'class']
#y = df.loc[:, df.columns == 'class']

#X_train, X_test, y_train, y_test = train_test_split(X, y)


In [None]:
# depth 3
treeclf = DecisionTreeClassifier(max_depth=3, random_state=1)
treeclf.fit(X_train, y_train)
y_pred = treeclf.predict(X_test)

In [None]:
# feature_cols = ['cap_shape', 'cap_surface', 'cap_color', 'bruises', 'odor', 'gill_attachment', 'gill_spacing', 'gill_size', 'gill_color', 'stalk_shape', 'stalk_root', 'stalk_surface', 'stalk_surface_above_ring', 'stalk_surface_below_ring', 'viel_type', 'veil_color', 'ring_number', 'ring_type', 'spore_print_color', 'population', 'habitat']
graph = Source(tree.export_graphviz(treeclf, out_file=None,
                                    class_names=['e', 'p'], filled = True))
display(SVG(graph.pipe(format='svg')))

In [None]:
p,r,f,s = precision_recall_fscore_support(y_test.tolist(), y_pred, labels=['p','e'])
print(p, r, f)

### max depth 6

In [None]:
treeclfsev = DecisionTreeClassifier(max_depth=6, random_state=1)
treeclfsev.fit(X_train, y_train)
y_pred = treeclfsev.predict(X_test)

In [None]:
graph = Source(tree.export_graphviz(treeclfsev, out_file=None,
                                    class_names=['e', 'p'], filled = True))
display(SVG(graph.pipe(format='svg')))

In [None]:
p,r,f,s = precision_recall_fscore_support(y_test.tolist(), y_pred, labels=['p','e'])
print(p, r, f)