# Load the required data stored in last notebooks 

In [1]:
import pickle
data = {}

with open('model_training_data', 'rb') as f:
    data = pickle.load(f)

In [3]:
data.keys()

dict_keys(['X_train', 'X_test', 'y_train', 'y_test', 'max_words', 'max_sequence', 'legend', 'labels_legend_inverted', 'tokenizer'])

###  Extract data from dict

In [9]:
X_test = data['X_test']
X_train = data['X_train']
y_test = data['y_test']
y_train = data['y_train']
labels_legend_inverted = data['labels_legend_inverted']
legend = data['legend']
max_sequence = data['max_sequence']
max_words = data['max_words']
tokenizer = data['tokenizer']

###  Import required modules

In [8]:
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.models import Model, Sequential

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

###  Calculate the step per epoch and validation size 

In [10]:
TRAINING_SIZE = len(X_train)
TRAINING_SIZE

2582

In [11]:
VALIDATION_SIZE = len(X_test)
VALIDATION_SIZE

1272

In [12]:
import math

In [13]:
BATCH_SIZE=128

# We take the ceiling because we do not drop the remainder of the batch
compute_steps_per_epoch = lambda x: int(math.ceil(1. * x / BATCH_SIZE))
steps_per_epoch = compute_steps_per_epoch(TRAINING_SIZE)
val_steps = compute_steps_per_epoch(VALIDATION_SIZE)

print(f'compute_steps_per_epoch : {compute_steps_per_epoch}')
print(f'steps_per_epoch : {steps_per_epoch}')
print(f'val_steps : {val_steps}')

compute_steps_per_epoch : <function <lambda> at 0x000002122A5F6D30>
steps_per_epoch : 21
val_steps : 10


###  Create model blue print

In [15]:
embed_dim = 128
lstm_out = 196
MAX_NUM_WORDS=280

model = Sequential()
model.add(Embedding(MAX_NUM_WORDS, embed_dim, input_length=X_train.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.3, recurrent_dropout=0.3))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 280, 128)          35840     
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 280, 128)          0         
_________________________________________________________________
lstm (LSTM)                  (None, 196)               254800    
_________________________________________________________________
dense (Dense)                (None, 2)                 394       
Total params: 291,034
Trainable params: 291,034
Non-trainable params: 0
_________________________________________________________________
None


In [16]:
epochs = 5
model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          steps_per_epoch=steps_per_epoch,
          batch_size=BATCH_SIZE,
          verbose=1,
          epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x2122c887790>

In [17]:
model.save('spam_model.h5')

In [20]:
model.history

<tensorflow.python.keras.callbacks.History at 0x2122c887790>

### Predict using model

In [25]:
model.predict(X_test[:3])

array([[0.13128425, 0.86871576],
       [0.9593877 , 0.04061222],
       [0.9846661 , 0.01533389]], dtype=float32)

In [27]:
y_test[:3]

array([[0., 1.],
       [1., 0.],
       [1., 0.]], dtype=float32)

###  create method to convert the text into token of equal size 

In [28]:
def get_token(texts):
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    from tensorflow.keras.preprocessing.text import Tokenizer
    MAX_NUM_WORDS=280
    tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)
    return pad_sequences(sequences, maxlen=MAX_NUM_WORDS)

In [31]:
model.predict(get_token(['get smartphone at lowest price call on 12345678']))

array([[0.11496684, 0.88503313]], dtype=float32)

In [32]:
model.predict(get_token(['your registration is successfully done']))

array([[0.9639953 , 0.03600475]], dtype=float32)

#  load model and check if its working fine

In [33]:
from keras.models import load_model

loaded_model = load_model('spam_model.h5')

In [34]:
loaded_model.predict(get_token(['get smartphone at lowest price call on 12345678']))

array([[0.11496684, 0.88503313]], dtype=float32)

In [35]:
loaded_model.predict(get_token(['your registration is successfully done']))

array([[0.9639953 , 0.03600475]], dtype=float32)

#     #******************** Happy Ending  ********************#