In [1]:
!pip install pandas scikit-learn




In [4]:
import pandas as pd

# Replace the path with your actual file path
file_path = r"D:\flutterwidget\neurocivicnet\spammessage\sms+spam+collection\SMSSpamCollection"
df = pd.read_csv(file_path, sep="\t", names=["label", "message"])

print(df.head())
print(df['label'].value_counts())


  label                                            message
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...
label
ham     4825
spam     747
Name: count, dtype: int64


In [5]:
from sklearn.model_selection import train_test_split

# Encode labels: ham = 0, spam = 1
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})

# Split dataset: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    df['message'],         # SMS text
    df['label_num'],       # numeric labels
    test_size=0.2,         # 20% test
    random_state=42        # for reproducibility
)

# Check the shapes
print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])


Training samples: 4457
Testing samples: 1115


In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer(
    stop_words="english",  # remove common English words
    max_features=3000      # limit to 3000 features for speed
)

# Fit on training data and transform
X_train_vec = vectorizer.fit_transform(X_train)

# Transform test data
X_test_vec = vectorizer.transform(X_test)

# Check the shape of vectors
print("Vectorized training data shape:", X_train_vec.shape)
print("Vectorized testing data shape:", X_test_vec.shape)



Vectorized training data shape: (4457, 3000)
Vectorized testing data shape: (1115, 3000)


In [8]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize Naive Bayes model
model = MultinomialNB()

# Train the model
model.fit(X_train_vec, y_train)

# Predict on test data
y_pred = model.predict(X_test_vec)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9856502242152466

Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       966
           1       0.99      0.90      0.94       149

    accuracy                           0.99      1115
   macro avg       0.99      0.95      0.97      1115
weighted avg       0.99      0.99      0.99      1115


Confusion Matrix:
 [[965   1]
 [ 15 134]]


In [18]:
# Sample messages to test
sample_messages = [
    # Ham (not spam)
    "Hey, are you free for dinner tonight?",
    "Don't forget your meeting at 3 PM.",
    "Can you send me the notes from class?",
    "Happy Birthday! Hope you have a great day!",
    "See you at the gym later!",

    # Spam
    "Congratulations! You've won a free iPhone. Claim now!",
    "URGENT! Your account will be suspended unless you call immediately.",
    "Win $5000 by completing this survey today!",
    "You have been selected for a cash prize. Reply ASAP!",
    "Get cheap prescription meds online without a prescription."
]


# Vectorize these messages using the same TF-IDF vectorizer
sample_vec = vectorizer.transform(sample_messages)

# Predict using the trained model
predictions = model.predict(sample_vec)
probabilities = model.predict_proba(sample_vec)

# Display results
for msg, pred, prob in zip(sample_messages, predictions, probabilities):
    print(f"Message: {msg}")
    print(f"Prediction: {'Spam' if pred==1 else 'Ham'}")
    print(f"Probability: Spam {prob[1]*100:.2f}%, Ham {prob[0]*100:.2f}%")
    print("-"*60)


Message: Hey, are you free for dinner tonight?
Prediction: Ham
Probability: Spam 1.32%, Ham 98.68%
------------------------------------------------------------
Message: Don't forget your meeting at 3 PM.
Prediction: Ham
Probability: Spam 2.03%, Ham 97.97%
------------------------------------------------------------
Message: Can you send me the notes from class?
Prediction: Ham
Probability: Spam 3.23%, Ham 96.77%
------------------------------------------------------------
Message: Happy Birthday! Hope you have a great day!
Prediction: Ham
Probability: Spam 0.38%, Ham 99.62%
------------------------------------------------------------
Message: See you at the gym later!
Prediction: Ham
Probability: Spam 1.09%, Ham 98.91%
------------------------------------------------------------
Message: Congratulations! You've won a free iPhone. Claim now!
Prediction: Spam
Probability: Spam 83.64%, Ham 16.36%
------------------------------------------------------------
Message: URGENT! Your account wi

In [19]:
# New long/marketing-style spam messages
extra_spam = [
    "Get cheap prescription meds online without a prescription.",
    "Win a free vacation to Bali! Complete this short survey now.",
    "URGENT! Your loan is approved. Call 123-456-7890 immediately.",
    "Earn $500/day from home! Start your online business today.",
    "You’ve been pre-approved for a credit card with no credit check.",
     "Congratulations! You have won a $1000 gift card. Click here to claim now!",
    "Dear user, your account has been selected for a free iPhone. Reply YES to claim.",
    "URGENT! Your loan is approved. Call 123-456-7890 immediately.",
    "You’ve been specially chosen for a limited-time investment offer. Don’t miss out!",
    "Win a free vacation to Bali! Just complete this short survey.",
    "Get cheap prescription meds online without a prescription. Order today!",
    "Your mobile number has won 5000 points. Redeem now before it expires.",
    "Act fast! Exclusive deal for our VIP members only.",
    "You’ve been pre-approved for a credit card with no credit check.",
    "Earn $500/day from home! Start your online business today.",
    "Congratulations! Claim your free Amazon voucher now.",
    "Your account is at risk! Verify immediately to avoid suspension.",
    "You’ve won a lottery! Contact us to claim your prize.",
    "Limited offer: Buy 1 get 1 free on all products today only.",
    "Earn money fast by working from home. Sign up now.",
    "You are selected for a free Netflix subscription. Reply YES.",
    "Get rich quick! Exclusive investment tips inside.",
    "Your package is waiting for delivery. Click here to track.",
    "Hot singles are waiting to chat with you now!",
    "Low-interest loans available. Apply before the offer ends.",
    "You have been selected for a free smartphone. Claim it now.",
    "Special promotion! Get 50% off your next purchase.",
    "Your credit score qualifies you for instant cash rewards.",
    "You have a pending refund. Click here to claim.",
    "Win a brand new car! Participate in our contest today.",
    "Act now! Your chance to earn $1000 in 24 hours.",
    "Exclusive deal: Free trial for 1 month. Sign up now.",
    "Get VIP access to our secret shopping club. Join today.",
    "You are the lucky winner of a $500 cash prize.",
    "Immediate action required: Update your billing information.",
    "Special offer just for you! Buy one, get one free.",
    "Claim your free tickets to the concert now!",
    "You have been approved for instant cash. Apply now.",
    "Work from home and earn money daily. Limited spots available.",
    "Your insurance claim is pending. Contact us immediately.",
    "Exclusive: Free premium subscription for 1 year. Register now.",
    "You are selected to receive free cryptocurrency. Claim today.",
    "Urgent! Your account will be suspended unless verified.",
    "Limited-time offer: Save 70% on selected products.",
    "Win a trip to Europe! Enter our contest now.",
    "Your payment is pending. Click here to complete it.",
    "Hot deals on luxury watches! Shop today.",
    "Earn money online easily! Join our platform now.",
    "You have won a shopping spree! Contact us to claim.",
    "Congratulations! Free gift card waiting for you.",
    "Act now! Limited seats for our online course.",
    "Your subscription is about to expire. Renew now.",
    "Get free access to premium content. Sign up today.",
    "Exclusive offer: Get 2 months free. Hurry!",
    "You are selected for a special cashback reward.",
    "Claim your free gift before the offer ends.",
    "Earn $1000/week by completing simple tasks.",
    "Your mobile has been selected for a free upgrade.",
    "Special invitation: Join our VIP club today.",
    "Win big prizes! Participate in our online draw.",
    "Limited offer! Free samples available for a short time.",
    "You have been chosen for a free trial of our product.",
    "Urgent notice: Verify your account to avoid suspension.",
    "Claim your reward points before they expire.",
    "Earn money while you sleep! Sign up now.",
    "Congratulations! You are our lucky winner today.",
    "Special deal: Get discounts up to 80% today.",
    "Your package delivery failed. Click here to reschedule.",
    "Free consultation available. Book your slot now.",
    "You’ve won free tickets to the event. Claim now.",
    "Act fast! Exclusive membership for a limited time.",
    "Your application is approved. Start earning today.",
    "Get free access to our premium tools. Register now.",
    "Limited-time bonus! Don’t miss this opportunity.",
    "Claim your free trial of our premium service.",
    "You’ve been selected for a special reward. Reply YES.",
    "Earn cash online easily. Start today!",
    "Congratulations! Free voucher waiting for you.",
    "Hot deals just for you! Shop now.",
    "You are pre-qualified for instant cash rewards.",
    "Your account needs immediate attention. Verify now.",
    "Get free premium access for a limited period.",
    "Limited spots available for our exclusive course.",
    "Win a luxury vacation! Enter now.",
    "Your invoice is pending. Click to pay immediately.",
    "Act now! Your special offer expires soon.",
    "You’ve won an exclusive membership. Claim today.",
    "Earn money from home easily. Sign up now.",
    "Special gift waiting for you. Claim before it’s gone.",
    "Your prize is ready! Contact us immediately.",
    "Limited offer! Save big on your next purchase.",
    "Congratulations! You are eligible for a free reward.",
    "Hot investment tips to make money fast. Join now.",
    "You have been chosen for free online training.",
    "Urgent! Your account verification is required.",
    "Claim your free subscription to our newsletter.",
    "Win cash prizes every day! Participate today.",
    "Exclusive deal! Get free access to premium content.",
    "Your special reward is waiting. Click to claim now."
]

# Label them as spam
extra_labels = [1]*len(extra_spam)


In [20]:
# Append to training sets
import pandas as pd

X_train = pd.concat([X_train.reset_index(drop=True), pd.Series(extra_spam)], ignore_index=True)
y_train = pd.concat([y_train.reset_index(drop=True), pd.Series(extra_labels)], ignore_index=True)


In [21]:
# Refit TF-IDF vectorizer on the new training set
vectorizer = TfidfVectorizer(stop_words="english", max_features=3000)
X_train_vec = vectorizer.fit_transform(X_train)

# Transform the test set (unchanged)
X_test_vec = vectorizer.transform(X_test)


In [22]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(X_train_vec, y_train)


In [23]:
sample_vec = vectorizer.transform(extra_spam)
predictions = model.predict(sample_vec)
for msg, pred in zip(extra_spam, predictions):
    print(f"Message: {msg} --> {'Spam' if pred==1 else 'Ham'}")


Message: Get cheap prescription meds online without a prescription. --> Spam
Message: Win a free vacation to Bali! Complete this short survey now. --> Spam
Message: URGENT! Your loan is approved. Call 123-456-7890 immediately. --> Spam
Message: Earn $500/day from home! Start your online business today. --> Ham
Message: You’ve been pre-approved for a credit card with no credit check. --> Spam
Message: Congratulations! You have won a $1000 gift card. Click here to claim now! --> Spam
Message: Dear user, your account has been selected for a free iPhone. Reply YES to claim. --> Spam
Message: URGENT! Your loan is approved. Call 123-456-7890 immediately. --> Spam
Message: You’ve been specially chosen for a limited-time investment offer. Don’t miss out! --> Spam
Message: Win a free vacation to Bali! Just complete this short survey. --> Spam
Message: Get cheap prescription meds online without a prescription. Order today! --> Spam
Message: Your mobile number has won 5000 points. Redeem now befo

In [24]:
# Sample messages to test (mix of Ham and Spam)
test_messages = [
    # Ham messages
    "Hey, are you free for dinner tonight?",
    "Don't forget your meeting at 3 PM.",
    "Can you send me the notes from class?",
    "Happy Birthday! Hope you have a great day!",
    "See you at the gym later!",

    # Short spam messages
    "Congratulations! You've won a free iPhone. Claim now!",
    "URGENT! Your account will be suspended unless you call immediately.",

    # Long/marketing-style spam messages
    "Get cheap prescription meds online without a prescription.",
    "Win a free vacation to Bali! Complete this short survey now.",
    "URGENT! Your loan is approved. Call 123-456-7890 immediately.",
    "Earn $500/day from home! Start your online business today.",
    "You’ve been pre-approved for a credit card with no credit check."
]

# Vectorize test messages
test_vec = vectorizer.transform(test_messages)

# Predict
predictions = model.predict(test_vec)
probabilities = model.predict_proba(test_vec)

# Display results
for msg, pred, prob in zip(test_messages, predictions, probabilities):
    print(f"Message: {msg}")
    print(f"Prediction: {'Spam' if pred==1 else 'Ham'}")
    print(f"Probability: Spam {prob[1]*100:.2f}%, Ham {prob[0]*100:.2f}%")
    print("-"*60)


Message: Hey, are you free for dinner tonight?
Prediction: Ham
Probability: Spam 1.50%, Ham 98.50%
------------------------------------------------------------
Message: Don't forget your meeting at 3 PM.
Prediction: Ham
Probability: Spam 2.32%, Ham 97.68%
------------------------------------------------------------
Message: Can you send me the notes from class?
Prediction: Ham
Probability: Spam 3.54%, Ham 96.46%
------------------------------------------------------------
Message: Happy Birthday! Hope you have a great day!
Prediction: Ham
Probability: Spam 0.45%, Ham 99.55%
------------------------------------------------------------
Message: See you at the gym later!
Prediction: Ham
Probability: Spam 1.19%, Ham 98.81%
------------------------------------------------------------
Message: Congratulations! You've won a free iPhone. Claim now!
Prediction: Spam
Probability: Spam 92.66%, Ham 7.34%
------------------------------------------------------------
Message: URGENT! Your account wil

In [25]:
import pickle


In [26]:
# Save the Naive Bayes model
with open("spam_model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)


In [27]:
# Save the TF-IDF vectorizer
with open("tfidf_vectorizer.pkl", "wb") as vec_file:
    pickle.dump(vectorizer, vec_file)


In [28]:
# Load model
with open("spam_model.pkl", "rb") as model_file:
    loaded_model = pickle.load(model_file)

# Load vectorizer
with open("tfidf_vectorizer.pkl", "rb") as vec_file:
    loaded_vectorizer = pickle.load(vec_file)

# Test loading with a new message
new_msg = ["Congratulations! You won a prize!"]
new_vec = loaded_vectorizer.transform(new_msg)
pred = loaded_model.predict(new_vec)
print("Prediction:", "Spam" if pred[0]==1 else "Ham")


Prediction: Spam


In [29]:
!pip install tensorflow




In [30]:
import tensorflow as tf
import numpy as np

# Vectorize the full training set
X_train_vec_dense = X_train_vec.toarray()  # convert sparse matrix to dense

# Convert labels to numpy array
y_train_np = np.array(y_train)

# Create a simple sequential model
tf_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_vec_dense.shape[1],)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

tf_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model to mimic Naive Bayes outputs
nb_preds = model.predict(X_train_vec)  # predictions from Naive Bayes
tf_model.fit(X_train_vec_dense, y_train_np, epochs=5, batch_size=32)




Epoch 1/5
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8286 - loss: 0.6504
Epoch 2/5
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8593 - loss: 0.5721
Epoch 3/5
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8639 - loss: 0.5089
Epoch 4/5
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8707 - loss: 0.4576
Epoch 5/5
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8782 - loss: 0.4158


<keras.src.callbacks.history.History at 0x29288da8590>

In [31]:
# Convert the trained TF model to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(tf_model)
tflite_model = converter.convert()

# Save TFLite model
with open("spam_model.tflite", "wb") as f:
    f.write(tflite_model)

print("TFLite model saved as spam_model.tflite")


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpi90prm_c\assets


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpi90prm_c\assets


Saved artifact at 'C:\Users\Asus\AppData\Local\Temp\tmpi90prm_c'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 3000), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  2828383123024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2828383124752: TensorSpec(shape=(), dtype=tf.resource, name=None)
TFLite model saved as spam_model.tflite
