<a href="https://colab.research.google.com/github/Joothis/Neural_Network_SMS_Text_Classifier/blob/main/Neural_Network_SMS_Text_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [73]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

In [60]:
# Change 'path_to_your_file.csv' to the actual file path
df = pd.read_csv("/content/spam.csv", encoding='latin-1')
df = df[['v1', 'v2']]  # Keeping only necessary columns
df.columns = ['label', 'message']  # Rename columns

print(df.head())

  label                                            message
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...


In [61]:
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

In [63]:
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

In [64]:
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train).toarray()
X_test_tfidf = vectorizer.transform(X_test).toarray()

In [67]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_tfidf.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [68]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [75]:
checkpoint = ModelCheckpoint('sms_spam_model.h5', save_best_only=True, monitor='val_loss', mode='min')

In [76]:
history = model.fit(X_train_tfidf, y_train, validation_data=(X_test_tfidf, y_test),epochs=10, batch_size=32, callbacks=[checkpoint])

Epoch 1/10
[1m137/140[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.8363 - loss: 0.4916



[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.8371 - loss: 0.4873 - val_accuracy: 0.9309 - val_loss: 0.1616
Epoch 2/10
[1m137/140[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - accuracy: 0.9630 - loss: 0.1188



[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.9634 - loss: 0.1178 - val_accuracy: 0.9785 - val_loss: 0.0748
Epoch 3/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.9923 - loss: 0.0294 - val_accuracy: 0.9803 - val_loss: 0.0756
Epoch 4/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - accuracy: 0.9971 - loss: 0.0129 - val_accuracy: 0.9812 - val_loss: 0.0782
Epoch 5/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - accuracy: 0.9984 - loss: 0.0079 - val_accuracy: 0.9821 - val_loss: 0.0873
Epoch 6/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - accuracy: 0.9995 - loss: 0.0037 - val_accuracy: 0.9794 - val_loss: 0.0958
Epoch 7/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 37ms/step - accuracy: 0.9993 - loss: 0.0021

In [77]:
# Evaluate model
loss, accuracy = model.evaluate(X_test_tfidf, y_test)
print(f'Final Test Accuracy: {accuracy * 100:.2f}%')

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9844 - loss: 0.0713
Final Test Accuracy: 98.21%


In [78]:
def predict_message(message):
    vectorized_message = vectorizer.transform([message]).toarray()
    prediction = model.predict(vectorized_message)
    return "Spam" if prediction > 0.5 else "Ham"

In [79]:
# Example usage
sample_text = "Congratulations! You have won a lottery. Click the link to claim."
print(f'Message: "{sample_text}" → Prediction: {predict_message(sample_text)}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
Message: "Congratulations! You have won a lottery. Click the link to claim." → Prediction: Spam


In [80]:
# Example usage
sample_text = "Hi, how are you?"
print(f'Message: "{sample_text}" → Prediction: {predict_message(sample_text)}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
Message: "Hi, how are you?" → Prediction: Ham
