<a href="https://colab.research.google.com/github/Nikhitaa2329/genAI1/blob/main/spam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install Required Libraries (if not already installed)
!pip install pandas scikit-learn

# Step 2: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 3: Load the dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep="\t", header=None, names=["label", "message"])

print("📄 Sample Data:")
print(df.head())

# Step 4: Preprocess and Encode Labels
df['label_num'] = df.label.map({'ham': 0, 'spam': 1})

# Step 5: Split Data
X = df['message']
y = df['label_num']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Step 6: Text Vectorization (Bag of Words)
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

# Step 7: Train Model
model = MultinomialNB()
model.fit(X_train_counts, y_train)

# Step 8: Predict and Evaluate
y_pred = model.predict(X_test_counts)

print("\n✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n🧾 Classification Report:\n", classification_report(y_test, y_pred))
print("🧩 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Step 9: Test with your own message
def predict_spam(text):
    text_transformed = vectorizer.transform([text])
    prediction = model.predict(text_transformed)[0]
    return "🚫 Spam" if prediction == 1 else "✅ Not Spam"

# 🔍 Try your own message
msg = input("\n📩 Enter a message to check if it's spam: ")
print("🔍 Prediction:", predict_spam(msg))




📄 Sample Data:
  label                                            message
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...

✅ Accuracy: 0.9885139985642498

🧾 Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      1207
           1       0.98      0.94      0.96       186

    accuracy                           0.99      1393
   macro avg       0.98      0.97      0.97      1393
weighted avg       0.99      0.99      0.99      1393

🧩 Confusion Matrix:
 [[1203    4]
 [  12  174]]

📩 Enter a message to check if it's spam: hello
🔍 Prediction: ✅ Not Spam
