### Developing Multilayer Neural Networks with Keras

In [110]:
import numpy as np
import pandas as pd

In [111]:
data = pd.read_csv('lemmNgramsBenefits2Contraindications3.csv', encoding='unicode_escape')

In [112]:
data.shape

(456, 545)

In [113]:
data.head()

Unnamed: 0,modality,lemmatizedContraindications,lemmatizedBenefits,ache.improve,ache.relieve,acute.pain,adhesion.heal,adhesion.improve,adhesion.improves,adhesion.increase,...,unable.sit.still,uterine.disease.fibroid,wound.anemia.blood,wound.aneurism.history,wound.area.sore,wound.cut.rash,wound.directly.surgical,wound.pregnant.heart,wound.skin.rash,wound.sore.sensitive
0,Myofascial Massage,dehydration local site wound sore sensitive sk...,muscle trauma muscle spasm pain trigger point,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,Prenatal Massage,dehydration high risk pregnancy history painfu...,improved circulation better sleep pain relief ...,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,Shiatsu Massage,dehydration fever rash infection mental disord...,improved circulation relaxing detox break apar...,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,Hot Stone Therapy Massage,dehydration fever rash infection mental disord...,relax client reduce stress calming increase h...,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Cupping Therapy,dehydration fever rash infection mental disord...,improved circulation better sleep pain relief ...,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0


In [114]:
np.random.seed(123)
data0 = data.reindex(np.random.permutation(data.index))

In [115]:
data1 = data0.iloc[:,3:]

In [116]:
data1.shape

(456, 542)

In [117]:
target=data0.iloc[:,0:1]

In [118]:
target.shape

(456, 1)

In [119]:
print(target['modality'].unique())
print(len(target['modality'].unique()))

['Hot Stone Therapy Massage' 'Cold Stone Therapy' 'Reflexology Massage'
 'Deep tissue Massage' 'Lymphatic Drainage Massage' 'Stretching'
 'Aromatherapy' 'Trigger Point Therapy' 'Biofreeze Muscle Pain Relief Gel'
 'Shiatsu Massage' 'Massage Gun Therapy' 'Cupping Therapy'
 'Cannabidiol (CBD) Massage Balm' 'Sports Massage' 'Myofascial Massage'
 'Craniosacral Massage' 'Prenatal Massage'
 'Instrument Assisted Soft Tissue Mobilization (IASTM) Friction Massage'
 'Swedish Massage']
19


**data1** and **target** are the data table of features and the target variable.

Mean Centering and normalization of the data1 features and the target

In [120]:

mean_vals = np.mean(data1, axis=0)
std_val = np.std(data1)

data1_centered = (data1 - mean_vals)/std_val

print(data1_centered.shape, target.shape)


(456, 542) (456, 1)


In [121]:
print(data1.head())

     ache.improve  ache.relieve  acute.pain  adhesion.heal  adhesion.improve  \
49              0             0           0              0                 1   
85              0             0           0              0                 0   
34              0             0           0              0                 0   
381             0             0           0              0                 1   
232             0             0           0              0                 0   

     adhesion.improves  adhesion.increase  adhesion.stress  \
49                   0                  0                0   
85                   0                  0                0   
34                   0                  0                0   
381                  0                  0                0   
232                  0                  0                0   

     alleviate.headache  anxiety.stress  ...  unable.sit.still  \
49                    0               0  ...                 0   
85            

In [122]:
print(target.head())

                       modality
49    Hot Stone Therapy Massage
85           Cold Stone Therapy
34          Reflexology Massage
381         Deep tissue Massage
232  Lymphatic Drainage Massage


In [123]:
#numpy function

class_mapping = {label: idx for idx, label in enumerate(np.unique(target['modality']))}
class_mapping

{'Aromatherapy': 0,
 'Biofreeze Muscle Pain Relief Gel': 1,
 'Cannabidiol (CBD) Massage Balm': 2,
 'Cold Stone Therapy': 3,
 'Craniosacral Massage': 4,
 'Cupping Therapy': 5,
 'Deep tissue Massage': 6,
 'Hot Stone Therapy Massage': 7,
 'Instrument Assisted Soft Tissue Mobilization (IASTM) Friction Massage': 8,
 'Lymphatic Drainage Massage': 9,
 'Massage Gun Therapy': 10,
 'Myofascial Massage': 11,
 'Prenatal Massage': 12,
 'Reflexology Massage': 13,
 'Shiatsu Massage': 14,
 'Sports Massage': 15,
 'Stretching': 16,
 'Swedish Massage': 17,
 'Trigger Point Therapy': 18}

In [124]:
target['mode']=target['modality']

In [125]:
target['modality'] = target['modality'].map(class_mapping)

In [126]:
target.head()

Unnamed: 0,modality,mode
49,7,Hot Stone Therapy Massage
85,3,Cold Stone Therapy
34,13,Reflexology Massage
381,6,Deep tissue Massage
232,9,Lymphatic Drainage Massage


In [127]:
target1 = target['modality']

In [128]:
target1.head()

49      7
85      3
34     13
381     6
232     9
Name: modality, dtype: int64

In [129]:
# Split/splice into training ~ 80% and testing ~ 20%
X_train = data1[:365]
X_test = data1[365:]
y_train = target1[:365]
y_test = target1[365:]

################################
# for adding the names of the classes after prediction from earlier in script
y_trainNames = target['mode']
y_trainNames = y_trainNames[:365]
y_trainNames.columns=['mode']
y_trainNames1=pd.DataFrame(y_trainNames)

y_testNames = target['mode']
y_testNames = y_testNames[365:]
y_testNames.columns=['mode']
y_testNames1=pd.DataFrame(y_testNames)
################################

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)



(365, 542)
(365,)
(91, 542)
(91,)


In [130]:
y_train

49      7
85      3
34     13
381     6
232     9
       ..
103    16
149    14
139     4
67     10
3       7
Name: modality, Length: 365, dtype: int64

Our data sets are centered, scaled, permutated, and split into 80% training and 20% testing data sets.

In [131]:
import tensorflow as tf
import tensorflow.contrib.keras as keras
#optionally use import tensorflow.keras as keras when no longer experimental contributor package development

np.random.seed(123)
tf.set_random_seed(123)

In [132]:
# y_train_onehot = keras.utils.to_categorical(y_train)

There is a problem with the tensorflow.contrib.keras function keras.utils.to_categorical() 
there is an error and it won't convert the string labels as integers. I tried different searches, and they
produced different errors. This worked on the numeric data this script was built for, but not on the 
multiclass labels in this data.

In [133]:
model = keras.models.Sequential()

model.add(
    keras.layers.Dense(
        units=150,   #output units need to match next layer inputs 
        input_dim=542, #number of features for input
        kernel_initializer='glorot_uniform',# name of the guy behind Xavier Initialization; the biases to zero
        bias_initializer='zeros',
        activation='tanh'))

model.add(
    keras.layers.Dense(
        units=150,   #output matches next layer input 
        input_dim=150, #input matches last layer's output
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh'))

model.add(
    keras.layers.Dense(
        units=19,  #these are the number of class categories in our target  
        input_dim=150,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='softmax'))#will return the class membership probs summing to 1 of all class probs

# these are hyperparameters that can be tuned if overfitting during training, or to get better accuracy
sgd_optimizer = keras.optimizers.SGD( 
        lr=0.001, decay=1e-7, momentum=.9)

# categorical_crossentropy is used in multiclass classification instead of binary_crossentropy
# to match the softmax function
model.compile(optimizer=sgd_optimizer,
              loss='sparse_categorical_crossentropy')
# it was 'categorical_crossentropy', but that expects binary matrices of 1s and 0s
# it said to use sparse_categorical_crossentropy

In [134]:
history = model.fit(X_train, y_train,
                    batch_size=64, epochs=50,
                    verbose=1, #setting verbose=1 will allow us to see the training and stop to tune parameters if needed
                    validation_split=0.1) # this takes 10% of the training set held out for testing/validation at each epoch

Train on 328 samples, validate on 37 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [135]:
y_train_pred = model.predict_classes(X_train, verbose=0)
print('First 3 predictions: ', y_train_pred[:3])

First 3 predictions:  [ 7  3 13]


In [136]:
y_train_pred = model.predict_classes(X_train, 
                                     verbose=0)

In [137]:
y_train_pred1 = pd.DataFrame(y_train_pred)
y_train_pred1.columns=['predicted']

y_train1 = y_train
y_train1.columns=['modality']
y_train1 = pd.DataFrame(y_train1)
y_train_pred1.index=y_train1.index

Train=pd.concat([y_train1['modality'],y_trainNames1['mode'],y_train_pred1['predicted']],axis=1)

print(Train)

     modality                        mode  predicted
49          7   Hot Stone Therapy Massage          7
85          3          Cold Stone Therapy          3
34         13         Reflexology Massage         13
381         6         Deep tissue Massage          6
232         9  Lymphatic Drainage Massage          9
..        ...                         ...        ...
103        16                  Stretching         16
149        14             Shiatsu Massage         14
139         4        Craniosacral Massage          4
67         10         Massage Gun Therapy         10
3           7   Hot Stone Therapy Massage          7

[365 rows x 3 columns]


In [138]:
y_test_pred = model.predict_classes(X_test, 
                                    verbose=0)



In [139]:
y_test_pred1 = pd.DataFrame(y_test_pred)
y_test_pred1.columns=['predicted']

y_test1 = y_test
y_test1.columns=['modality']
y_test1 = pd.DataFrame(y_test1)
y_test_pred1.index=y_test1.index

Test=pd.concat([y_test1['modality'],y_testNames1['mode'],y_test_pred1['predicted']],axis=1)

print(Test)

     modality                                               mode  predicted
342         8  Instrument Assisted Soft Tissue Mobilization (...          8
56         10                                Massage Gun Therapy         10
304         8  Instrument Assisted Soft Tissue Mobilization (...          8
233         9                         Lymphatic Drainage Massage          9
51          2                     Cannabidiol (CBD) Massage Balm          2
..        ...                                                ...        ...
230        16                                         Stretching         16
98          9                         Lymphatic Drainage Massage          9
322        12                                   Prenatal Massage         12
382         3                                 Cold Stone Therapy          3
365         2                     Cannabidiol (CBD) Massage Balm          2

[91 rows x 3 columns]


In [140]:
s = sum(Train['modality']==Train['predicted'])
l = len(Train['modality'])
accTrain = s/l
print('Training Correctly Predicted:',s,'Training Accuracy:',accTrain,'\n')

Training Correctly Predicted: 365 Training Accuracy: 1.0 



In [141]:
s = sum(Test['modality']==Test['predicted'])
l = len(Test['modality'])
accTest = s/l
print('Testing Correctly Predicted:',s,'Testing Accuracy:',accTest)

Testing Correctly Predicted: 91 Testing Accuracy: 1.0
