In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [12]:
df = pd.read_csv("combined_data.csv")

In [14]:
x_train, x_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

In [16]:
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV')
tokenizer.fit_on_texts(x_train)

In [18]:
x_train_seq = tokenizer.texts_to_sequences(x_train)
x_test_seq = tokenizer.texts_to_sequences(x_test)

In [19]:
max_len = 20
x_train_pad = pad_sequences(x_train_seq, maxlen=max_len, padding='post')
x_test_pad = pad_sequences(x_test_seq, maxlen=max_len, padding='post')

In [22]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=max_len),
    LSTM(64),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [23]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [26]:
model.fit(x_train_pad, y_train, epochs=10, batch_size=4, validation_data=(x_test_pad, y_test))

Epoch 1/10
[1m16690/16690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 8ms/step - accuracy: 0.9843 - loss: 0.0454 - val_accuracy: 0.9688 - val_loss: 0.0930
Epoch 2/10
[1m16690/16690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 9ms/step - accuracy: 0.9892 - loss: 0.0328 - val_accuracy: 0.9682 - val_loss: 0.1119
Epoch 3/10
[1m16690/16690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 8ms/step - accuracy: 0.9940 - loss: 0.0192 - val_accuracy: 0.9682 - val_loss: 0.1203
Epoch 4/10
[1m16690/16690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 11ms/step - accuracy: 0.9952 - loss: 0.0153 - val_accuracy: 0.9668 - val_loss: 0.1469
Epoch 5/10
[1m16690/16690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 11ms/step - accuracy: 0.9967 - loss: 0.0106 - val_accuracy: 0.9670 - val_loss: 0.1393
Epoch 6/10
[1m16690/16690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 11ms/step - accuracy: 0.9969 - loss: 0.0095 - val_accuracy: 0.9646 - val_lo

<keras.src.callbacks.history.History at 0x1ced9246000>

In [27]:
loss, accuracy = model.evaluate(x_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9666 - loss: 0.1947
Test Accuracy: 0.9665


In [28]:
def predict(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len, padding='post')
    pred = model.predict(padded)[0][0]
    return "Spam" if pred > 0.5 else "Not Spam"

In [35]:
# Example 1: Obvious spam
print("Test 1:", predict("Congratulations! You've won a free iPhone. Click here to claim now."))

# Example 2: Normal message
print("Test 2:", predict("Hey, can we reschedule our meeting for tomorrow?"))

# Example 3: Spam with urgency
print("Test 3:", predict("Urgent! Your bank account has been compromised. Login now."))

# Example 4: Friendly message
print("Test 4:", predict("Let's grab lunch this afternoon."))

# Example 5: Contest/Prize offer
print("Test 5:", predict("Win ₹1,00,000 cash prize! Enter the contest today."))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step
Test 1: Spam
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
Test 2: Not Spam
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
Test 3: Spam
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
Test 4: Spam
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Test 5: Not Spam


In [36]:
import gradio as gr

interface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(lines=4, placeholder="Enter a message here..."),
    outputs="text",
    title="Spam Message Classifier",
    description="Enter a message and the model will predict whether it's Spam or Not Spam."
)

interface.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step
