<a href="https://colab.research.google.com/github/Meenarekha/GEN-AI/blob/main/SUPERVISED_LEARNING_PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Spam Detection in SMS using Logistic Regression

In [4]:
# Import modules
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load SMS Spam dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', header=None, names=['label', 'message'])

# Encode labels: ham → 0, spam → 1
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split data
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# Text vectorization
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train model
clf = LogisticRegression()
clf.fit(X_train_vec, y_train)

# Evaluate model
preds = clf.predict(X_test_vec)
print("✅ Model Trained Successfully")
print("🔍 Accuracy on Test Set:", accuracy_score(y_test, preds))
print("📊 Classification Report:\n", classification_report(y_test, preds))

# User input for prediction
print("\n🔎 Check if your message is SPAM or NOT")
user_msg = input("Enter your SMS message: ")

# Transform and predict
user_vec = vectorizer.transform([user_msg])
user_pred = clf.predict(user_vec)

# Output result
print("\n🧾 Prediction Result:")
print("👉 The message is:", "🚫 SPAM" if user_pred[0] == 1 else "✅ NOT SPAM")


✅ Model Trained Successfully
🔍 Accuracy on Test Set: 0.9883408071748879
📊 Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       966
           1       1.00      0.91      0.95       149

    accuracy                           0.99      1115
   macro avg       0.99      0.96      0.97      1115
weighted avg       0.99      0.99      0.99      1115


🔎 Check if your message is SPAM or NOT
Enter your SMS message:  Congratulations! You've won a free ticket to Maldives. Call now to claim.

🧾 Prediction Result:
👉 The message is: 🚫 SPAM


In [5]:
# Import modules
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load SMS Spam dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', header=None, names=['label', 'message'])

# Encode labels: ham → 0, spam → 1
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split data
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# Text vectorization
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train model
clf = LogisticRegression()
clf.fit(X_train_vec, y_train)

# Evaluate model
preds = clf.predict(X_test_vec)
print("✅ Model Trained Successfully")
print("🔍 Accuracy on Test Set:", accuracy_score(y_test, preds))
print("📊 Classification Report:\n", classification_report(y_test, preds))

# User input for prediction
print("\n🔎 Check if your message is SPAM or NOT")
user_msg = input("Enter your SMS message: ")

# Transform and predict
user_vec = vectorizer.transform([user_msg])
user_pred = clf.predict(user_vec)

# Output result
print("\n🧾 Prediction Result:")
print("👉 The message is:", "🚫 SPAM" if user_pred[0] == 1 else "✅ NOT SPAM")


✅ Model Trained Successfully
🔍 Accuracy on Test Set: 0.9883408071748879
📊 Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       966
           1       1.00      0.91      0.95       149

    accuracy                           0.99      1115
   macro avg       0.99      0.96      0.97      1115
weighted avg       0.99      0.99      0.99      1115


🔎 Check if your message is SPAM or NOT
Enter your SMS message:  Hey, are we still meeting at 6 PM today?

🧾 Prediction Result:
👉 The message is: ✅ NOT SPAM
