# Chapter 11: Neural nets

> (c) 2019 Galit Shmueli, Peter C. Bruce, Peter Gedeck 
>
> Code included in
>
> _Data Mining for Business Analytics: Concepts, Techniques, and Applications in Python_ (First Edition) 
> Galit Shmueli, Peter C. Bruce, Peter Gedeck, and Nitin R. Patel. 2019.

## Import required packages

In [1]:
%matplotlib inline

from pathlib import Path

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

# !pip install dmba
from dmba import classificationSummary


## Tiny Data

* Load and import the tinydata
* train a nnet model to help predict like/dislike using fat and salt as predictors

In [2]:
tiny = pd.read_csv('TinyData.csv')

In [3]:
tiny.head(10)

Unnamed: 0,Obs.,Fat,Salt,Acceptance
0,1,0.2,0.9,like
1,2,0.1,0.1,dislike
2,3,0.2,0.4,dislike
3,4,0.2,0.5,dislike
4,5,0.4,0.5,like
5,6,0.3,0.8,like


In [4]:
predictors=['Fat','Salt']
outcome = 'Acceptance'
X=tiny[predictors]
y=tiny[outcome]

clf = MLPClassifier(activation='logistic',solver='lbfgs',hidden_layer_sizes=3,random_state=1)
clf.fit(X,y)

MLPClassifier(activation='logistic', hidden_layer_sizes=3, random_state=1,
              solver='lbfgs')

In [6]:
clf.predict(X)

array(['like', 'dislike', 'dislike', 'dislike', 'like', 'like'],
      dtype='<U7')

In [7]:
clf.coefs_

[array([[ -1.30656481,  -4.20427792, -13.29587332],
        [ -0.04399727,  -4.91606924,  -6.03356987]]),
 array([[ -0.27348313],
        [ -9.01211573],
        [-17.63504694]])]

In [8]:
classificationSummary(y, clf.predict(X))

Confusion Matrix (Accuracy 1.0000)

       Prediction
Actual 0 1
     0 3 0
     1 0 3


In [9]:
pd.concat([tiny, pd.DataFrame(clf.predict_proba(X))], axis=1)

Unnamed: 0,Obs.,Fat,Salt,Acceptance,0,1
0,1,0.2,0.9,like,0.00049,0.99951
1,2,0.1,0.1,dislike,0.999994,6e-06
2,3,0.2,0.4,dislike,0.999741,0.000259
3,4,0.2,0.5,dislike,0.997368,0.002632
4,5,0.4,0.5,like,0.002133,0.997867
5,6,0.3,0.8,like,7.5e-05,0.999925


In [10]:
classes=sorted(y.unique())
pd.concat([tiny, pd.DataFrame(clf.predict_proba(X), columns=classes)], axis=1)

Unnamed: 0,Obs.,Fat,Salt,Acceptance,dislike,like
0,1,0.2,0.9,like,0.00049,0.99951
1,2,0.1,0.1,dislike,0.999994,6e-06
2,3,0.2,0.4,dislike,0.999741,0.000259
3,4,0.2,0.5,dislike,0.997368,0.002632
4,5,0.4,0.5,like,0.002133,0.997867
5,6,0.3,0.8,like,7.5e-05,0.999925


In [11]:
classificationSummary(y, clf.predict(X), class_names=classes)

Confusion Matrix (Accuracy 1.0000)

        Prediction
 Actual dislike    like
dislike       3       0
   like       0       3


## Accidents Data

* Load and import the accidents data
* Below are the descriptions of each variable:
*** ALCHL_I = acohol presnece denoted by 1 and absense denoted by 2
*** PROFIL_I_R = profile of roadway level 1 or 0
*** SUR_COND = Surface ondition of the road: 1 = dry; 2 = wet; 3 = snow; 4 = ice; 9 = unknown
*** VEH_INVL = # of vehicles involved
*** MAX_SEV_IR; 0 = no injuries; 1 = injury; 2 = fatality

* Create a nnet using ALCHL_I, PROFIL_I_R and VEH_INVL as predictors and predict the MAX_SEV_IR with 2 hidden layers

* Create a nnet using ALCHL_I, PROFIL_I_R and VEH_INVL as predictors and predict the MAX_SEV_IR with 2 hidden layers BUT find the class probabilities for each outcome

* Create a nnet using ALCHL_I, PROFIL_I_R and VEH_INVL as predictors and predict the MAX_SEV_IR BUT perform grid search to identify the optimal # of hidden layers with cv=5


In [12]:
accidents = pd.read_csv('accidentsnn.csv')
accidents.head()

Unnamed: 0,ALCHL_I,PROFIL_I_R,SUR_COND,VEH_INVL,MAX_SEV_IR
0,2,0,1,1,0
1,2,1,1,1,2
2,1,0,1,1,0
3,2,0,2,2,1
4,2,1,1,2,1


In [13]:
accidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   ALCHL_I     999 non-null    int64
 1   PROFIL_I_R  999 non-null    int64
 2   SUR_COND    999 non-null    int64
 3   VEH_INVL    999 non-null    int64
 4   MAX_SEV_IR  999 non-null    int64
dtypes: int64(5)
memory usage: 39.1 KB


In [14]:
accidents.SUR_COND=accidents.SUR_COND.astype('category')
accidents.MAX_SEV_IR=accidents.MAX_SEV_IR.astype('category')

processed=pd.get_dummies(accidents,columns=['SUR_COND']).drop(columns=['SUR_COND_9'])
outcome='MAX_SEV_IR'
predictors=[c for c in processed.columns if c !=outcome]

X=processed[predictors]
y=processed[outcome]
train_X, valid_X, train_y, valid_y = train_test_split(X,y,test_size=0.4,random_state=1)

In [15]:
clf=MLPClassifier(hidden_layer_sizes=2, activation='logistic',solver='lbfgs',random_state=1)
clf.fit(train_X, train_y)

MLPClassifier(activation='logistic', hidden_layer_sizes=2, random_state=1,
              solver='lbfgs')

In [16]:
classificationSummary(train_y, clf.predict(train_X))

Confusion Matrix (Accuracy 0.8664)

       Prediction
Actual   0   1   2
     0 331   0   1
     1   0 180   0
     2  30  49   8


In [17]:
classificationSummary(valid_y, clf.predict(valid_X))

Confusion Matrix (Accuracy 0.8550)

       Prediction
Actual   0   1   2
     0 218   0   1
     1   0 119   0
     2  24  33   5


In [18]:
clf.predict_proba(train_X)

array([[9.60152413e-01, 3.22073115e-14, 3.98475868e-02],
       [9.60152413e-01, 3.22073115e-14, 3.98475868e-02],
       [2.85315952e-15, 8.81998313e-01, 1.18001687e-01],
       ...,
       [9.60152413e-01, 3.22073115e-14, 3.98475868e-02],
       [9.60152413e-01, 3.22073115e-14, 3.98475868e-02],
       [9.60152413e-01, 3.22073115e-14, 3.98475868e-02]])

## Fitting class probabilities separately

In [19]:
processed=pd.get_dummies(accidents)
processed=processed.drop(columns=['SUR_COND_9'])

processed.head()

Unnamed: 0,ALCHL_I,PROFIL_I_R,VEH_INVL,SUR_COND_1,SUR_COND_2,SUR_COND_3,SUR_COND_4,MAX_SEV_IR_0,MAX_SEV_IR_1,MAX_SEV_IR_2
0,2,0,1,1,0,0,0,1,0,0
1,2,1,1,1,0,0,0,0,0,1
2,1,0,1,1,0,0,0,1,0,0
3,2,0,2,0,1,0,0,0,1,0
4,2,1,2,1,0,0,0,0,1,0


In [20]:
outcome=['MAX_SEV_IR_0','MAX_SEV_IR_1','MAX_SEV_IR_2']
predictors=[c for c in processed.columns if c not in outcome]
X=processed[predictors]
y=processed[outcome]
classes=sorted(outcome)

train_X, valid_X, train_y, valid_y = train_test_split(X,y,test_size=0.4,random_state=1)

In [21]:
clf = MLPClassifier(hidden_layer_sizes=2,activation='logistic',solver='lbfgs',random_state=1,
                    max_iter=1000)
clf.fit(train_X, train_y)

MLPClassifier(activation='logistic', hidden_layer_sizes=2, max_iter=1000,
              random_state=1, solver='lbfgs')

In [22]:
clf.predict(train_X)

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       ...,
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0]])

In [23]:
clf.predict_proba(train_X)

array([[9.64908794e-01, 2.09656030e-23, 5.08058122e-02],
       [9.64908794e-01, 2.09656030e-23, 5.08058122e-02],
       [3.31083439e-09, 7.76106255e-01, 1.07838013e-01],
       ...,
       [9.64908794e-01, 2.09656030e-23, 5.08058122e-02],
       [9.64908794e-01, 2.09656030e-23, 5.08058122e-02],
       [9.64908794e-01, 2.09656030e-23, 5.08058122e-02]])

In [24]:
pred = pd.DataFrame(clf.predict(train_X), columns=outcome)
pred.head()

Unnamed: 0,MAX_SEV_IR_0,MAX_SEV_IR_1,MAX_SEV_IR_2
0,1,0,0
1,1,0,0
2,0,1,0
3,1,0,0
4,1,0,0


In [25]:
prob = pd.DataFrame(clf.predict_proba(train_X), columns=outcome)
prob.head()

Unnamed: 0,MAX_SEV_IR_0,MAX_SEV_IR_1,MAX_SEV_IR_2
0,0.9649088,2.0965600000000002e-23,0.050806
1,0.9649088,2.0965600000000002e-23,0.050806
2,3.310834e-09,0.7761063,0.107838
3,0.7736709,3.312417e-16,0.159343
4,0.7736709,3.312417e-16,0.159343


In [26]:
pd.concat([pred,prob],axis=1)

Unnamed: 0,MAX_SEV_IR_0,MAX_SEV_IR_1,MAX_SEV_IR_2,MAX_SEV_IR_0.1,MAX_SEV_IR_1.1,MAX_SEV_IR_2.1
0,1,0,0,9.649088e-01,2.096560e-23,0.050806
1,1,0,0,9.649088e-01,2.096560e-23,0.050806
2,0,1,0,3.310834e-09,7.761063e-01,0.107838
3,1,0,0,7.736709e-01,3.312417e-16,0.159343
4,1,0,0,7.736709e-01,3.312417e-16,0.159343
...,...,...,...,...,...,...
594,0,1,0,4.864919e-06,7.055484e-01,0.337684
595,0,1,0,4.864919e-06,7.055484e-01,0.337684
596,1,0,0,9.649088e-01,2.096560e-23,0.050806
597,1,0,0,9.649088e-01,2.096560e-23,0.050806


## Grid search

In [27]:
from sklearn.model_selection import cross_val_score, GridSearchCV

In [28]:
param_grid = {'hidden_layer_sizes':[(1),(2),(3),(4),(5),(6),(7),(8)]}

In [29]:
gridsearch=GridSearchCV(MLPClassifier(activation='logistic',solver='lbfgs',
                                      random_state=1, max_iter=500),
                        param_grid=param_grid,cv=5,n_jobs=-1)
gridsearch.fit(train_X,train_y)

GridSearchCV(cv=5,
             estimator=MLPClassifier(activation='logistic', max_iter=500,
                                     random_state=1, solver='lbfgs'),
             n_jobs=-1,
             param_grid={'hidden_layer_sizes': [1, 2, 3, 4, 5, 6, 7, 8]})

In [30]:
gridsearch.best_score_

0.8630672268907563

In [31]:
gridsearch.best_params_

{'hidden_layer_sizes': 4}