In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import GlobalMaxPooling1D, Dense, Input, LSTM, Embedding
from tensorflow.keras.models import Model

In [None]:
df = pd.read_csv('spam.csv', encoding='ISO-8859-1')

In [None]:
df.head()

In [None]:
df = df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1)

In [None]:
df.head()

In [None]:
df.columns = ['labels','data']

In [None]:
df.head()

In [None]:
df['b_labels'] = df['labels'].map({'ham':0, 'spam':1})
Y = df['b_labels'].values

In [None]:
df_train, df_test, Y_train, Y_test = train_test_split(df['data'], Y, test_size=0.33)

In [None]:
MAX_VOCAB_SIZE = 20000
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(df_train)
sequences_train = tokenizer.texts_to_sequences(df_train)
sequences_test = tokenizer.texts_to_sequences(df_test)

In [None]:
word2idx = tokenizer.word_index
V = len(word2idx)
print('Found %s unique tokens' %V)

In [None]:
data_train = pad_sequences(sequences_train)
print(data_train.shape)
T = data_train.shape[1]

In [None]:
data_test = pad_sequences(sequences_test, maxlen=T)
data_test.shape

In [None]:
D = 20    #Embedding Dimensionality
M = 15    #Hidden state Dimensionality

i = Input(shape=(T,))
x = Embedding(V + 1, D)(i)
x = LSTM(M, return_sequences=True)(x)
x = GlobalMaxPooling1D()(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(i,x)

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

r = model.fit(data_train, Y_train, epochs=10, validation_data=(data_test, Y_test))

In [None]:
plt.plot(r.history['loss'], label='loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()

In [None]:
plt.plot(r.history['accuracy'], label='accuracy')
plt.plot(r.history['val_accuracy'], label='val_accuracy')
plt.legend()