In [1]:
!pip install tensorflow_text

Collecting tensorflow_text
  Downloading tensorflow_text-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_text
Successfully installed tensorflow_text-2.15.0


In [2]:
import tensorflow_text

In [19]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input,Dense
from tensorflow_hub import KerasLayer # to use bert pretrained model
from keras import Model # functional api

import numpy as np
import pandas as pd
from tensorflow import string as tf_string

In [4]:
MODEL_INFO = {
    "model_name":"small_bert/bert_en_uncased_L-4_H-512_A-8",
    # Identifies as BERT model
    "model_encoder_link":"https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1",
    #link to the pretrained bert model's encoder,TensorFlow Hub is a repository of pre-trained models that can be easily reused in TensorFlow applications
    "model_preprocessor_link":"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"
    #preprocessor link * preprocessor is responsible for handling the text input and preparing it in a format suitable for the BERT model.
}

#the above links are from dataset word file in advance nlp folder

In [5]:
model_preprocessor = KerasLayer(MODEL_INFO["model_preprocessor_link"],name = "model_preprocessor")

In [6]:
model_encoder = KerasLayer(MODEL_INFO['model_encoder_link'],name ="model_encoder")

In [7]:
# Building DNN architecture -Functional API

def neural_network_model(model_preprocessor,model_encoder):
  input_layer = Input(shape=(),dtype = tf_string)
  encoder_input_layer = model_preprocessor(input_layer)
  encoder_output_layer = model_encoder(encoder_input_layer)
  pooled_output_layer =encoder_output_layer['pooled_output']
  hidden_layer_1 = Dense(10,activation = 'relu')(pooled_output_layer)
  output_layer = Dense(1,activation = 'sigmoid')(hidden_layer_1)
  bert_model = Model(input_layer,output_layer)
  bert_model.compile(optimizer = 'adam',loss = 'binary_crossentropy',metrics =['accuracy'])
  return bert_model

In [8]:
bert_model = neural_network_model(model_preprocessor,model_encoder)

In [9]:
input_dataset = pd.read_csv("/content/sms_spam.csv")
input_dataset.head()

Unnamed: 0,label,text
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [32]:
input_dataset['text'][2]

"Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's"

In [10]:
input_dataset.shape

(5572, 2)

In [11]:
bert_model.fit(x=input_dataset['text'], y =input_dataset['label'],batch_size=32,epochs =1)



<keras.src.callbacks.History at 0x7d7c3dc74760>

In [12]:
input_dataset['Predicted_probs'] = bert_model.predict(input_dataset['text'])



In [13]:
input_dataset['Predicted_probs'] = (input_dataset['Predicted_probs'].apply(lambda x :"spam" if x >0.5 else "not spam"))

In [20]:
bert_model.predict(np.array(["Hello how are you"]))



array([[0.02976334]], dtype=float32)

In [28]:
bert_model.predict(np.array(["WOW! Congratulation,yow won $500, can you share your account details for further process"]))



array([[0.39091885]], dtype=float32)

In [29]:
bert_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None,)]                    0         []                            
                                                                                                  
 model_preprocessor (KerasL  {'input_type_ids': (None,    0         ['input_1[0][0]']             
 ayer)                       128),                                                                
                              'input_word_ids': (None,                                            
                             128),                                                                
                              'input_mask': (None, 128)                                           
                             }                                                                

In [33]:
bert_model.predict(np.array(["Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's"]))



array([[0.77272886]], dtype=float32)

In [35]:
# Another method of doing

In [34]:
from keras.models import load_model
from datetime import datetime
import os

In [15]:
## Importing external python modules:
import tensorflow_text
from keras.layers import Input
from tensorflow_hub import KerasLayer
import numpy as np
import pandas as pd
from datetime import datetime
import os
from keras.models import load_model

class NLPModel(object):

    def __init__(
        self,
        filepath="sms_spam.csv",
        x_columns=["text"],
        y_columns=["label"],
        batch_size=64,
        n_epochs=1,
        model_filename = "bert_model.h5"
    ):
        self.filepath = filepath
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.model_filename = model_filename
        self.seed_value = 1024
        self.x_columns = x_columns
        self.y_columns = y_columns
        self.MODEL_INFO = {
        "model_name": "small_bert/bert_en_uncased_L-4_H-512_A-8",
        "model_encoder_link": "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1",
        "model_preprocessor_link": "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"
        }
        self.model_preprocessor = KerasLayer(MODEL_INFO["model_preprocessor_link"])
        self.model_encoder = KerasLayer(MODEL_INFO["model_encoder_link"])
        np.random.seed(self.seed_value)

    def read_training_data(self):
        training_dataset = pd.read_csv(self.filepath)
        return training_dataset

    def neural_network_model(self):
        input_layer = Input(shape=(), dtype=tf_string)
        encoder_input_layer = model_preprocessor(input_layer)
        encoder_output_layer = model_encoder(encoder_input_layer)
        pooled_output_layer = encoder_output_layer['pooled_output']
        hidden_layer_1 = Dense(10, activation="relu")(pooled_output_layer)
        output_layer = Dense(1, activation="sigmoid")(hidden_layer_1)
        bert_model = Model(input_layer, output_layer)
        bert_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
        return bert_model

    def train_model(self):
        training_dataset = self.read_training_data()
        bert_model = self.neural_network_model()
        # Model training begins:
        start = datetime.now()
        bert_model.fit(
            x=training_dataset[self.x_columns],
            y=training_dataset[self.y_columns],
            epochs=self.n_epochs,
            batch_size=self.batch_size
        )
        bert_model.save(self.model_filename)
        end = datetime.now()
        return bert_model

    def make_prediction(self, sms_content):
        if os.path.isfile(self.model_filename):
            bert_model = load_model(
                self.model_filename,
                custom_objects = {
                    "KerasLayer": self.model_preprocessor,
                    "KerasLayer": self.model_encoder
                }
            )
        else:
            bert_model = self.train_model()

        predicted_prob = (
            bert_model.predict(
                np.array([sms_content])
        ))
        predicted_class = "Not Spam"
        if predicted_prob > 0.5:
            predicted_class = "Spam"
        return predicted_prob, predicted_class

In [16]:
my_sentence = NLPModel().make_prediction("Hi Achyuth, Hope iam doing well, can we connect today at 5GMT")

OSError: Unable to open file (truncated file: eof = 28311552, sblock->base_addr = 0, stored_eof = 115218560)

In [None]:
Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's