# Neural Networks and Learning Course
----------------------------------------------------------
# Mushroom Recommender Projet
----------------------------------------------------------
# By Begum SARIGUZEL

#### Importing libraries

In [33]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

%matplotlib inline

In [11]:
mushroom = pd.read_csv("mushrooms.csv")

In [12]:
mushroom.drop(['gill-attachment'],axis=1,inplace=True)
mushroom.drop(['veil-color'],axis=1,inplace=True)
mushroom.drop(['ring-number'],axis=1,inplace=True)

In [13]:
mushroom[mushroom.columns] = mushroom[mushroom.columns].astype('category')
target = mushroom['class']

In [14]:
labelen = LabelEncoder()
mushroom_encode = mushroom[mushroom.columns].apply(lambda col: labelen.fit_transform(col))

In [15]:
mushroom.drop(['veil-type'],axis=1,inplace=True)
mushroom_encode.drop(['veil-type'],axis=1,inplace=True)

In [16]:
y = mushroom_encode['class']
mushroom_encode.drop(['class'],axis=1,inplace=True)

#### Creating MLP classifier

In [17]:
mlp = MLPClassifier(hidden_layer_sizes=(1, 2),  #1 node, 2 nodes, 2 layers
                    activation='relu',  #max(0,x) activation function
                    solver='sgd',  # stochastic gradient descent
                    alpha=0.0001, # L2 penalty                                    (sum of the alpha*square of parameters) which controls the size or height of the parameters(theta)
                    batch_size=20, # size of minibatches
                    learning_rate='constant', 
                    learning_rate_init=0.05, 
                    max_iter=500, 
                    shuffle=True, # matter of ordering the data
                    tol=0.00001,  #tolerance (epsilon in lecture 41)
                    verbose=True) # having messages

print(mlp)

MLPClassifier(activation='relu', alpha=0.0001, batch_size=20, beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(1, 2), learning_rate='constant',
              learning_rate_init=0.05, max_iter=500, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='sgd', tol=1e-05,
              validation_fraction=0.1, verbose=True, warm_start=False)


#### Creating small data consisted of odorless white spore point color mushrooms

In [19]:
no_odor = mushroom[mushroom['odor'].isin(['n'])]
no_odor_w = no_odor[no_odor['spore-print-color'].isin(['w'])]
(len(mushroom.index) - len(no_odor_w.index)) / len(mushroom.index)

0.9231905465288035

In [22]:
y_no_odor = no_odor_w['class']

#### Label encoding

In [23]:
no_odor_encode = no_odor_w[no_odor_w.columns].apply(lambda col: labelen.fit_transform(col))


In [24]:
y_no_odor = no_odor_encode['class']
no_odor_encode.drop(['class'],axis=1,inplace=True)

#### One hot encoding

In [25]:
column_names_no_odor = no_odor_w.columns
no_odor_w.drop(['class'],axis=1,inplace=True)
no_odor_preprocessed = pd.get_dummies(no_odor_w)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


#### Splitting the data into training and testing data sets

In [26]:
X_train, X_test, y_train, y_test = train_test_split(no_odor_preprocessed,y_no_odor, test_size=0.3)

#### Training the model

In [27]:
mlp.fit(X_train, y_train)

Iteration 1, loss = 0.36751586
Iteration 2, loss = 0.27566456
Iteration 3, loss = 0.27490110
Iteration 4, loss = 0.27415047
Iteration 5, loss = 0.27404331
Iteration 6, loss = 0.27436714
Iteration 7, loss = 0.27474007
Iteration 8, loss = 0.27419541
Iteration 9, loss = 0.27447870
Iteration 10, loss = 0.27441271
Iteration 11, loss = 0.27447134
Iteration 12, loss = 0.27425449
Iteration 13, loss = 0.27457608
Iteration 14, loss = 0.27472566
Iteration 15, loss = 0.27440552
Iteration 16, loss = 0.27444684
Training loss did not improve more than tol=0.000010 for 10 consecutive epochs. Stopping.


MLPClassifier(activation='relu', alpha=0.0001, batch_size=20, beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(1, 2), learning_rate='constant',
              learning_rate_init=0.05, max_iter=500, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='sgd', tol=1e-05,
              validation_fraction=0.1, verbose=True, warm_start=False)

In [28]:
trainingScore = mlp.score(X_train,y_train)
print("Accuracy (on training set) = ", round(trainingScore,4))

testingScore = mlp.score(X_test,y_test)
print("Accuracy (on test set) = ", round(testingScore,4))

Accuracy (on training set) =  0.922
Accuracy (on test set) =  0.9255


### With all data

In [29]:
mushroom.drop(['class'],axis=1,inplace=True)
mushroom_preprocessed = pd.get_dummies(mushroom)

In [30]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(mushroom_preprocessed,y, test_size=0.3)

In [31]:
mlp.fit(X_train1, y_train1)

Iteration 1, loss = 0.14743516
Iteration 2, loss = 0.05050139
Iteration 3, loss = 0.05025434
Iteration 4, loss = 0.05047573
Iteration 5, loss = 0.05055218
Iteration 6, loss = 0.05055952
Iteration 7, loss = 0.05034329
Iteration 8, loss = 0.05036758
Iteration 9, loss = 0.05039472
Iteration 10, loss = 0.05025470
Iteration 11, loss = 0.05029274
Iteration 12, loss = 0.05028405
Iteration 13, loss = 0.05030947
Iteration 14, loss = 0.05038889
Training loss did not improve more than tol=0.000010 for 10 consecutive epochs. Stopping.


MLPClassifier(activation='relu', alpha=0.0001, batch_size=20, beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(1, 2), learning_rate='constant',
              learning_rate_init=0.05, max_iter=500, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='sgd', tol=1e-05,
              validation_fraction=0.1, verbose=True, warm_start=False)

In [32]:
trainingScore = mlp.score(X_train1,y_train1)
print("Accuracy (on training set) = ", round(trainingScore,4))

testingScore = mlp.score(X_test1,y_test1)
print("Accuracy (on test set) = ", round(testingScore,4))

Accuracy (on training set) =  0.99
Accuracy (on test set) =  0.9873


#### Using GridSearcCv to find best parameters

In [None]:
param_grid = [
  {'hidden_layer_sizes': [(5,5), (10,10), (15,15), (25,25)], 
   'learning_rate_init':[0.003, 0.01, 0.03, 0.1],
   'alpha': [0.00001, 0.0001, 0.001, 0.01]}
 ]
#print(param_grid)

# Cross-validation grid-search
scores = ['precision', 'recall']
for score in scores:
    clf2 = GridSearchCV( MLPClassifier(activation='relu', alpha=1e-07, batch_size=4, beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10,), learning_rate='constant',
       learning_rate_init=0.005, max_iter=500, momentum=0.8,
       nesterovs_momentum=True, power_t=0.5, random_state=11, shuffle=True,
       solver='adam', tol=1e-05, validation_fraction=0.3, verbose=False,
       warm_start=False), 
       param_grid, cv=3, scoring='%s_macro' % score)
    
    clf2.fit(X_train, y_train)
    print("Best parameters set found on development set:")
    print()
    print(clf2.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf2.cv_results_['mean_test_score']
    stds = clf2.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf2.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
           % (mean, std * 2, params))
    print()
    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf2.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

#### Rebuilding the model considering the best parameters

In [None]:
mlp2 = MLPClassifier(hidden_layer_sizes=(5,5),  #5 nodes, 2 layers
                    activation='relu',  #max(0,x) activation function
                    solver='sgd',  # stochastic gradient descent
                    alpha=0.00001, # L2 penalty (sum of the alpha*square of parameters) which controls the size or height of the parameters(theta)
                    batch_size=20, # size of minibatches
                    learning_rate='constant', 
                    learning_rate_init=0.003, 
                    max_iter=500, 
                    shuffle=True, # matter of ordering the data
                    tol=0.00001,  #tolerance (epsilon in lecture 41)
                    verbose=True) # having messages

print(mlp2)

#### Training and evaluation the model

In [None]:
mlp2.fit(X_train2=1, y_train1)
trainingScore = mlp2.score(X_train1,y_train1)
print("Accuracy (on training set) = ", round(trainingScore,4))

# Evaluate acuracy on test data
testingScore = mlp2.score(X_test1,y_test1)
print("Accuracy (on test set) = ", round(testingScore,4))