In [6]:
#Import All The Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
tf.random.set_seed(3)
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#Import the dataset
df=pd.read_csv("mail_data.csv")

#Data Cleaning
new_df=df.where((pd.notnull(df)),'')
new_df['Category']=new_df['Category'].map({'ham':1,'spam':0})
x=new_df['Message']
y=new_df['Category']

#Train-Test Split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

#Data Preprocessing
feature_extraction=TfidfVectorizer(min_df=1,stop_words='english',lowercase=True)
x_train_1=feature_extraction.fit_transform(x_train)
x_test_1=feature_extraction.transform(x_test)
y_train=y_train.astype('int')
y_test=y_test.astype('int')
x_train_1 = x_train_1.toarray()
x_test_1 = x_test_1.toarray()

#Model Training
input_dim = x_train_1.shape[1]  # number of TF-IDF features
model = keras.Sequential([
    keras.layers.Input(shape=(input_dim,)),
    keras.layers.Dense(64,activation='relu'),
    keras.layers.Dense(32,activation='relu'),
    keras.layers.Dense(16,activation='relu'),
    keras.layers.Dense(1,activation='sigmoid')
])
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
es=EarlyStopping(patience=20,restore_best_weights=True)
history=model.fit(x_train_1,y_train,validation_split=0.1,epochs=20,batch_size=32,callbacks=es)

#Model Testing and accuracy
loss,Accuracy=model.evaluate(x_test_1,y_test)
print(f"Test Loss :{loss:.4f}")
print(f"Test Accuracy :{Accuracy:.4f}")

Epoch 1/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.8162 - loss: 0.5393 - val_accuracy: 0.9395 - val_loss: 0.1750
Epoch 2/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.9763 - loss: 0.1008 - val_accuracy: 0.9709 - val_loss: 0.0898
Epoch 3/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9948 - loss: 0.0138 - val_accuracy: 0.9731 - val_loss: 0.0959
Epoch 4/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9993 - loss: 0.0037 - val_accuracy: 0.9709 - val_loss: 0.1039
Epoch 5/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9997 - loss: 0.0020 - val_accuracy: 0.9709 - val_loss: 0.1164
Epoch 6/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9997 - loss: 0.0015 - val_accuracy: 0.9709 - val_loss: 0.1256
Epoch 7/20
[1m126/126

In [6]:
x_train.shape

(4457,)