In [None]:
!pip install tensorflow_text

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [None]:
import pandas as pd
import numpy as np

In [None]:
!unzip "/content/archive (1).zip"

In [None]:
df = pd.read_csv("/content/spam.csv")
df.head()

In [None]:
df.groupby("Category").describe()

In [None]:
df["Category"].value_counts()

In [None]:
df_spam = df[df["Category"]=="spam"]
df_spam.shape

In [None]:
df_ham = df[df["Category"]=="ham"]
df_ham.shape

In [None]:
df_ham_downsample = df_ham.sample(df_spam.shape[0])
df_ham_downsample.shape

In [None]:
df_balanced = pd.concat([df_spam, df_ham_downsample])
df_balanced.shape

In [None]:
df_balanced["Category"].value_counts()

In [None]:
df_balanced["spam"] = df_balanced["Category"].apply(lambda x:1 if x=="spam" else 0)

In [None]:
df_balanced.sample(10)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_balanced["Message"],df_balanced["spam"],stratify=df_balanced["spam"] )

In [None]:
X_train.head()

In [None]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [None]:
# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(l)

# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])

In [None]:
model.summary()

In [None]:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=METRICS)

In [None]:
model.fit(X_train, y_train, epochs=20)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_predicted = model.predict(X_test)
y_predicted = y_predicted.flatten()

In [None]:
y_predicted = np.where(y_predicted > 0.5, 1, 0)
y_predicted

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_predicted)
cm

In [None]:
from matplotlib import pyplot as plt
import seaborn as sn
sn.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')

In [None]:
print(classification_report(y_test, y_predicted))

In [None]:
reviews = [
    'Enter a chance to win $5000, hurry up, offer valid until march 31, 2021',
    'You are awarded a SiPix Digital Camera! call 09061221061 from landline. Delivery within 28days. T Cs Box177. M221BP. 2yr warranty. 150ppm. 16 . p pÂ£3.99',
    'it to 80488. Your 500 free text messages are valid until 31 December 2005.',
    'Hey Sam, Are you coming for a cricket game tomorrow',
    "Why don't you wait 'til at least wednesday to see if you get your ."
]
model.predict(reviews)