In [12]:
import numpy as np
import pandas as pd
import re
import nltk
nltk.download('stopwords')
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding , SimpleRNN , Dense
from tensorflow.keras.callbacks import EarlyStopping , TensorBoard
from tensorflow.keras.models import load_model
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import datetime
import pickle

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Mohit\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
model=load_model('model.h5')



In [3]:
model.summary()

In [5]:
model.get_weights()

[array([[ 0.13383146, -0.03290616, -0.05809052, ...,  0.01967067,
          0.19606653, -0.01614201],
        [-0.02528405, -0.02435291, -0.00292114, ...,  0.01280126,
          0.03746379,  0.000666  ],
        [-0.0328173 , -0.00855344, -0.01279134, ..., -0.02399154,
         -0.05791276,  0.10278855],
        ...,
        [ 0.01315242, -0.02130634,  0.00021758, ...,  0.04535646,
          0.04624068,  0.00832478],
        [ 0.02500044, -0.04855026,  0.0308511 , ...,  0.02750278,
          0.01529292,  0.02269349],
        [ 0.00941496,  0.04880268,  0.01916846, ...,  0.02943027,
         -0.00808353, -0.0210886 ]], dtype=float32),
 array([[-0.1367121 , -0.11864655,  0.1281474 , ...,  0.0873216 ,
          0.06315236,  0.13095364],
        [ 0.0793447 ,  0.09727192,  0.02803271, ..., -0.02981564,
          0.0590229 , -0.09707949],
        [ 0.10161565, -0.11294707,  0.10761014, ..., -0.15786466,
         -0.00253513,  0.0335158 ],
        ...,
        [ 0.1050588 ,  0.02835315, -0.1

In [14]:
with open('tokenizer.pkl' , 'rb') as file:
    tokenizer=pickle.load(file)

In [7]:
ps=PorterStemmer()

In [35]:
def preprocess_text(text):
    review=re.sub('[^a-zA-Z]' , ' ' , text)
    review=review.lower()
    review=review.split()
    review=[ps.stem(word) for word in review if word not in stopwords.words('english')]
    review=' '.join(review)
    return review
    
def encode_and_pad_text(text):
    sequences=tokenizer.texts_to_sequences([text])
    padded_text=pad_sequences(sequences , maxlen=500 , padding='pre')
    return padded_text

In [36]:
text = 'Hey there is a bumper offer on your credit card , check now'
preprocessed_text=preprocess_text(text)
print(preprocessed_text)
encoded_text=encode_and_pad_text(preprocessed_text)
print(encoded_text)
print(encoded_text.shape)

hey bumper offer credit card check
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   

In [39]:
temp_array=model.predict(encoded_text)
print(temp_array)
temp_array[0][0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[[0.02820978]]


np.float32(0.028209776)

In [80]:
def prediction(text):
    preprocessed_text=preprocess_text(text)
    encoded_text=encode_and_pad_text(preprocessed_text)
    prediction=model.predict(encoded_text)
    
    if prediction[0][0] < 0.5:
        target='ham'
    else:
        target='spam'
    return target , prediction

In [81]:
target , proba=prediction(text)

print(target)
print(proba)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
ham
[[0.00949941]]


In [82]:
print(proba.flatten()[0])

0.009499415


In [83]:
text='Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...'
target , proba = prediction(text)
print(target , proba)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
ham [[0.00949941]]


In [87]:
text = "WINNER!! As a valued network customer you have been selected to receivea å£900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only."
target , proba=prediction(text)
print(target , proba)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
spam [[0.99999994]]
