In [1]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

In [2]:
# Loading the dataset
data = pd.read_csv('phishing_email.csv')

In [3]:
data.head()

Unnamed: 0,text_combined,label
0,hpl nom may 25 2001 see attached file hplno 52...,0
1,nom actual vols 24 th forwarded sabrae zajac h...,0
2,enron actuals march 30 april 1 201 estimated a...,0
3,hpl nom may 30 2001 see attached file hplno 53...,0
4,hpl nom june 1 2001 see attached file hplno 60...,0


In [4]:
print(data.columns)

Index(['text_combined', 'label'], dtype='object')


In [5]:
# Preprocessing the data 
max_words = 5000
max_len = 200

In [6]:
# Tokenizing the text data
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(data['text_combined'])
X = tokenizer.texts_to_sequences(data['text_combined'])
X = pad_sequences(X, maxlen=max_len)


In [7]:
# Converting labels to integer
y = data['label'].astype(int)

In [8]:
# Spliting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Building the LSTM model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=64))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(32))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [10]:
# Compiling the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
# Training the model
history = model.fit(X_train, y_train, epochs=1, validation_data=(X_test, y_test), batch_size=32)

[1m2063/2063[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m538s[0m 214ms/step - accuracy: 0.9247 - loss: 0.1812 - val_accuracy: 0.9828 - val_loss: 0.0543


In [12]:
# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_acc}')

[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 67ms/step - accuracy: 0.9813 - loss: 0.0524
Test Accuracy: 0.9828463792800903






[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip
