# CLASSIFICATION with LSTM Network in Keras

In [109]:
# Import the necessary libraries, modules
import pandas as pd # Pandas library for reading '.csv' files as dataframes
import numpy as np # Numpy library for creating and modifying arrays.
from keras.layers import Dense, SimpleRNN, GRU, LSTM, Embedding ,Dropout,BatchNormalization# Import layers from Keras
from keras.models import Sequential

In [2]:
import os
os.chdir('C:\\Users\\atul\\Desktop')

### Reading the data

In [3]:
raw_data = pd.read_csv('AuthorsDataset.csv', encoding='latin-1')


print(raw_data.shape)
print(raw_data.columns)
print(raw_data)

(750, 2)
Index(['Text', 'Author'], dtype='object')
                                                  Text     Author
0    I give Pirrip as my father's family name, on t...    Charles
1    Ours was the marsh country, down by the river,...    Charles
2    You bring me, to-morrow morning early, that fi...    Charles
3    The marshes were just a long black horizontal ...    Charles
4    My sister, Mrs. Joe, with black hair and eyes,...    Charles
5    My sister had a trenchant way of cutting our b...    Charles
6    The effort of resolution necessary to the achi...    Charles
7    Some medical beast had revived Tar-water in th...    Charles
8    Conscience is a dreadful thing when it accuses...    Charles
9    As soon as the great black velvet pall outside...    Charles
10   The mist was heavier yet when I got out upon t...    Charles
11   All this time, I was getting on towards the ri...    Charles
12   Why, see now! said he. When a man's alone on t...    Charles
13   I indicated in what 

In [4]:
from sklearn.utils import shuffle

data= shuffle(raw_data)

print(data)

                                                  Text     Author
352  No doubt for first offenders, and for all offe...    HGWells
7    Some medical beast had revived Tar-water in th...    Charles
635  This was all wretched for a stranger like myse...  Stevenson
56   When I got up to my little room and said my pr...    Charles
60   I crossed the staircase landing, and entered t...    Charles
2    You bring me, to-morrow morning early, that fi...    Charles
191  By this time we had come to the house, where I...    Charles
594  The second day I crossed the island to all sid...  Stevenson
731  Why, so it would appear, says he, filling his ...  Stevenson
271  Individual liberty in a community is not, as m...    HGWells
113  My sister was never left alone now; but Joe mo...    Charles
510  It was drawing on to sundown when I met a stou...  Stevenson
254  There must always be a certain effect of hardn...    HGWells
631  Never a word they spoke as they pulled ashore,...  Stevenson
73   Meanw

In [5]:
from sklearn.model_selection import train_test_split
train,test = train_test_split(data, test_size = 0.2) 
print(train.shape)
print(test.shape)

(600, 2)
(150, 2)


In [6]:
train.columns

Index(['Text', 'Author'], dtype='object')

### Converting unstructured text to structured numeric form
This includes:
1. Tokenizing
2. Converting sequence of words to sequence of word indeces
3. Converting varing length sequences to fixed length sequences through padding

In [8]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train.Text)

train_x = tokenizer.texts_to_sequences(train.Text)
train_x = pad_sequences(train_x, maxlen=100)
test_x = tokenizer.texts_to_sequences(test.Text)
test_x = pad_sequences(test_x, maxlen=100)

train_x.shape, test_x.shape

((600, 100), (150, 100))

In [12]:
print(train_x[2])

[ 101  622    9  408 2806 5610    6   14 5611 3701    4  479    5 2227
   15   23    2   25    7  163  155   15   14 5612    2 5613  883   22
    1 5614  255   16    8    9   10 1828    3    1  182 5615    3   14
  754   94  104    7   10 2228 5616    2  530   17 1358    1  391  378
  312 2229   21   23    2 3702   19  183    2  589  180 2230  272  180
 2230   11   49    9   35    5  113  667  531    8    7 2807 5617    5
  256  590 1208    7   71   80 3703   40    4   27    1 1556    4  462
   14 2808]


In [42]:
from keras.utils import to_categorical # This convers the labels to one-hot vectors(Dummies)
unique_labels = list(data.Author.unique())
train_y = np.array([unique_labels.index(i) for i in train.Author])
train_y = to_categorical(train_y)
test_y_ = np.array([unique_labels.index(i) for i in test.Author])
test_y = to_categorical(test_y_)

In [43]:
import keras.backend as K # This 'K' can be used to create user defined functions in keras

# Define a custom function in keras to compute recall.
# Arguments:
# y_true - Actual labels
# y_pred - Predicted labels
def recall(y_true, y_pred):
    TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    PP = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = TP / (PP + K.epsilon())
    return recall

### Building and training an LSTM model

In [53]:
# Building an LSTM model
look_back=4 ##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=50)) # Add an embedding layer which represents each unique token as a vector
model.add(LSTM(7,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer
model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes for 3 classes.

In [54]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_11 (Embedding)     (None, 100, 50)           581550    
_________________________________________________________________
lstm_9 (LSTM)                (None, 7)                 1624      
_________________________________________________________________
dense_9 (Dense)              (None, 3)                 24        
Total params: 583,198
Trainable params: 583,198
Non-trainable params: 0
_________________________________________________________________


In [55]:
print(train_y.shape)
print(test_y.shape)
print(train_x.shape)
print(test_x.shape)

(600, 3)
(150, 3)
(600, 100)
(150, 100)


In [56]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x25476f1d278>

### Prediction and evaluation on test data
1. Check the network output on test data. What do these values represent?
2. Predict the class labels on test data
2. Evaluate the model on test data

Hint: Check model.predict, model.predict_classes, model.evaluate in keras

In [70]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x25477d377f0>

In [68]:
test_prob = model.predict(test_x)
test_prob[5]

array([0.07045849, 0.8476717 , 0.08186986], dtype=float32)

In [63]:
test_prob = model.predict(test_x)
test_prob[5]
test_classes = model.predict_classes(test_x)
test_classes.shape
print(test_classes)

[2 1 1 0 2 1 2 2 1 1 1 1 2 0 0 0 1 1 1 1 0 0 0 2 2 1 0 0 1 1 2 1 1 1 2 0 0
 0 1 1 0 2 1 1 1 1 0 1 1 0 2 2 2 1 2 2 0 2 1 0 1 0 1 1 1 0 1 1 2 0 1 2 0 1
 2 2 2 1 0 2 1 1 0 0 1 1 0 0 2 1 2 2 2 2 0 1 0 1 2 1 1 2 1 1 1 1 1 1 1 0 1
 1 1 2 1 2 1 0 0 0 2 0 1 1 2 2 2 1 2 1 1 1 1 1 2 0 2 1 0 1 2 1 1 1 2 2 1 0
 2 1]


In [65]:
import keras.backend as K # This 'K' can be used to create user defined functions in keras

# Define a custom function in keras to compute recall.
# Arguments:
# y_true - Actual labels
# y_pred - Predicted labels
def recall(y_true, y_pred):
    TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    PP = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = TP / (PP + K.epsilon())
    return recall

<function __main__.recall>

# Now tuning the privious model

### Understanding an intermediate layer in keras


In [71]:
# Building an LSTM model
look_back=15##here i am giving to 15 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=100)) # Add an embedding layer which represents each unique token as a vector
model.add(LSTM(15,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer
model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes for 3 classes.

In [72]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x25477a49630>

In [None]:
##above result is the biasing case then need more tuning

In [75]:
# Building an LSTM model
look_back=715##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=100)) # Add an embedding layer which represents each unique token as a vector
model.add(LSTM(8,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer
model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes 

In [76]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x25408223a90>

In [None]:
##Now I am using drop outs for over fitting

In [80]:
# Building an LSTM model
look_back=7##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=100)) # Add an embedding layer which represents each unique token as a vector
model.add(Dropout(0.5))
model.add(LSTM(15,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer

model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes 

In [81]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2540c5bd8d0>

In [98]:
# Building an LSTM model
look_back=7##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=150)) # Add an embedding layer which represents each unique token as a vector
model.add(Dropout(0.4))
model.add(LSTM(12,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer


model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes 

In [97]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x25426a77400>

In [99]:
test_prob = model.predict(test_x)
test_prob[5]
test_classes = model.predict_classes(test_x)
test_classes.shape
print(test_classes)

[1 1 2 0 2 2 1 0 2 0 2 0 2 1 0 0 2 0 2 1 1 0 0 0 1 0 2 0 0 1 2 2 2 1 1 1 2
 2 2 2 0 2 1 1 2 0 2 0 0 0 0 2 1 1 0 2 2 1 0 2 1 0 2 0 1 0 0 1 0 0 1 2 0 2
 0 0 0 0 0 2 2 1 0 0 1 0 0 2 2 0 2 1 1 0 1 0 1 2 2 1 0 0 2 0 2 0 2 0 2 2 2
 2 2 2 2 2 2 2 2 2 1 0 2 1 0 2 0 1 2 0 1 2 0 0 1 0 2 1 0 2 2 2 0 0 2 0 1 1
 0 2]


# now using Optimizer='rmsprop'

In [101]:
look_back=7##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=150)) # Add an embedding layer which represents each unique token as a vector
model.add(Dropout(0.4))
model.add(LSTM(12,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer


model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes


In [102]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2542c55e828>

In [104]:
look_back=7##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=150)) # Add an embedding layer which represents each unique token as a vector
model.add(Dropout(0.4))
model.add(LSTM(12,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer


model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes

In [110]:
from keras import optimizers

###################ADAM hyper tune

In [115]:
##Hypertuning The Adam
adam = optimizers.Adam(lr=0.01, decay=1e-6, beta_1=.8 ,beta_2=.85)


In [116]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)


Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x25434ea6320>

In [None]:
##now I am using normalization

In [92]:
# Building an LSTM model
look_back=7##here i am giving to 4 words at a time stamp

model = Sequential() # Call Sequential to initialize a network

model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=100)) # Add an embedding layer which represents each unique token as a vector
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(LSTM(15,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer

model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes 

In [93]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)


Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2541d013da0>

## You can see that when I use normalzation accuracy dicreases

In [117]:
look_back=7##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=150)) # Add an embedding layer which represents each unique token as a vector
model.add(Dropout(0.4))
model.add(LSTM(12,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer


model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)

Train on 450 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x254396acf60>

# Transfer Learning

In [118]:
model.layers

[<keras.layers.embeddings.Embedding at 0x25432694048>,
 <keras.layers.core.Dropout at 0x25432694128>,
 <keras.layers.recurrent.LSTM at 0x254326942e8>,
 <keras.layers.core.Dense at 0x254326943c8>]

In [125]:
import keras.backend as K 
# Create a user defined function in keras, where we mention the input and output
# This function returns a list
eo = K.function([model.layers[0].input],
                  [model.layers[0].output])

out = eo([train_x[0:5]]) 
print(type(out))
print(len(out))
print(out[0].shape)

<class 'list'>
1
(5, 100, 150)


In [124]:
out[0]

array([[[-0.00406499, -0.03752612,  0.02748853, ...,  0.03032188,
         -0.01770221, -0.0141458 ],
        [-0.01464888, -0.00491512,  0.07457757, ...,  0.04264292,
          0.01147406, -0.02281868],
        [ 0.05290215, -0.03380545,  0.06093838, ...,  0.06677757,
          0.00228953,  0.0051606 ],
        ...,
        [ 0.02954119,  0.02109104,  0.0712281 , ...,  0.01830088,
          0.05430136,  0.02652512],
        [ 0.01524809,  0.01126303,  0.05917569, ...,  0.00049231,
          0.02038612, -0.08786622],
        [ 0.08145417,  0.03187617,  0.07208585, ..., -0.02405494,
          0.0391557 ,  0.00609524]],

       [[-0.01036389,  0.02318658,  0.04365764, ...,  0.0020917 ,
          0.00108375,  0.02792811],
        [-0.01303279, -0.00939311,  0.03490455, ...,  0.00327179,
          0.03049978, -0.08557211],
        [ 0.01992651,  0.00969533, -0.02897304, ..., -0.03797258,
          0.0065158 ,  0.02314282],
        ...,
        [-0.00243311,  0.0309361 ,  0.01980856, ...,  

In [None]:
last_layer = model.get_layer('block5_pool').output ## here i am using "fc2 layer 
x = Flatten(name='flatten')(last_layer)
x = Dense(32, activation='relu', name='fc1')(x)

out = Dense(num_classes, activation='softmax', name='output')(x)
custom_vgg_model3 = Model(image_input, out)

In [None]:
look_back=7##here i am giving to 4 words at a time stamp
model = Sequential() # Call Sequential to initialize a network
model.add(Embedding(input_dim=len(tokenizer.word_index), 
                    input_length=100, 
                    output_dim=150)) # Add an embedding layer which represents each unique token as a vector
model.add(Dropout(0.4))
model.add(LSTM(12,input_shape=(1, look_back), return_sequences=False)) # Add an LSTM layer


model.add(Dense(3, activation='softmax')) # Add an ouput layer. Since classification, 3 nodes
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy',recall])
model.fit(train_x, train_y, epochs=20, validation_split=0.25)