<a href="https://colab.research.google.com/github/FieryCatalyst/Data-Science-Analysis/blob/main/Naive_Bayes_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# 1. Load data directly from UCI repository (no download required)
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', header=None, names=['label', 'message'])

# 2. Encode labels (ham = 0, spam = 1)
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# 3. Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# 4. Convert text to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# 5. Train a Multinomial Naïve Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train_vec, y_train)

# 6. Make predictions
y_pred = nb_model.predict(X_test_vec)

# 7. Evaluate model performance
print("✅ Model Evaluation Results:\n")
print("Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=['Ham', 'Spam']))

# 8. Try some custom predictions
samples = [
    "Congratulations! You've won a $1000 Walmart gift card. Click here to claim your prize.",
    "Hey, are we still meeting for coffee tomorrow?",
    "Free entry in 2 a weekly competition to win FA Cup final tickets!"
]
sample_vec = vectorizer.transform(samples)
predictions = nb_model.predict(sample_vec)

print("\n📩 Sample Predictions:")
for msg, pred in zip(samples, predictions):
    label = "Spam" if pred == 1 else "Ham"
    print(f" - {label}: {msg}")


✅ Model Evaluation Results:

Accuracy: 97.85 %

Classification Report:
               precision    recall  f1-score   support

         Ham       0.98      1.00      0.99       966
        Spam       1.00      0.84      0.91       149

    accuracy                           0.98      1115
   macro avg       0.99      0.92      0.95      1115
weighted avg       0.98      0.98      0.98      1115


📩 Sample Predictions:
 - Spam: Congratulations! You've won a $1000 Walmart gift card. Click here to claim your prize.
 - Ham: Hey, are we still meeting for coffee tomorrow?
 - Spam: Free entry in 2 a weekly competition to win FA Cup final tickets!
