**Step 1:- Import Libraries**

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

**Step 2:- Load Dataset**

In [10]:
df = pd.read_csv("/kaggle/input/sms-spam-collection-dataset/spam.csv", encoding='latin-1')
df = df[['v1', 'v2']]  # Keep only required columns
df.columns = ['label', 'text']  # Rename for clarity
print(df.head())

  label                                               text
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...


**Step 3:- Data Preprocessing**

In [11]:
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})

In [13]:
x = df['text']
y = df['label_num']
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.25, random_state=42)

**TF-IDF Vectorizer**

In [14]:
vectorizer = TfidfVectorizer(stop_words='english')

x_train_tfidf = vectorizer.fit_transform(x_train)

x_test_tfidf = vectorizer.transform(x_test)

**Step 4:- Train the model (Multinomial Naive bayes)**

In [15]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(x_train_tfidf, y_train)

**Step 5:- Predict on test data**

In [16]:
y_pred = model.predict(x_test_tfidf)

**Step 6:- Evaluate the model**

In [17]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

print("Accuracy:", accuracy_score(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nclassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9662598707824839

Confusion Matrix:
[[1202    0]
 [  47  144]]

classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98      1202
           1       1.00      0.75      0.86       191

    accuracy                           0.97      1393
   macro avg       0.98      0.88      0.92      1393
weighted avg       0.97      0.97      0.96      1393



**Let's predict on user input like real world problems**

In [18]:
def predict_sms_message(model, vectorizer):
    print("🔹 Enter an SMS message below:")
    user_msg = input("✉️ SMS: ")

    # Vectorize the message
    user_msg_tfidf = vectorizer.transform([user_msg])

    # Predict using the trained model
    pred = model.predict(user_msg_tfidf)

    # Show result
    print("\n🔍 Result:")
    if pred[0] == 1:
        print("🚫 Spam Message Detected!")
    else:
        print("✅ Not Spam (Ham) Message.")


In [19]:
predict_sms_message(model, vectorizer)


🔹 Enter an SMS message below:


✉️ SMS:  Free entry in 2 a wkly comp to win FA Cup fina



🔍 Result:
🚫 Spam Message Detected!
