Step 1: Install Dependencies (if needed)

#### Step 2: Import Libraries

In [1]:
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

# Download NLTK stopwords
nltk.download('stopwords')


ModuleNotFoundError: No module named 'matplotlib'

#### Step 3: Load and Explore the Dataset

In [None]:
# Load SMS Spam dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep="\t", names=["label", "message"])

# Preview the dataset
df.head()


### Step 4: Preprocess the Text Data



In [None]:
ps = PorterStemmer()

def preprocess(text):
    text = text.lower()
    text = ''.join([ch for ch in text if ch not in string.punctuation])
    words = text.split()
    words = [ps.stem(word) for word in words if word not in stopwords.words('english')]
    return ' '.join(words)

# Apply preprocessing
df['cleaned_message'] = df['message'].apply(preprocess)


#### Step 5: Label Encoding

In [None]:
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})


#### 📌 Step 6: Feature Extraction using TF-IDF


In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_message'])
y = df['label_num']


#### 📌 Step 7: Train-Test Split

In [2]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


NameError: name 'train_test_split' is not defined

#### 📌 Step 8: Model Training

In [None]:
model = MultinomialNB()
model.fit(X_train, y_train)


#### 📌 Step 9: Model Evaluation

In [None]:
y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📄 Classification Report:\n", classification_report(y_test, y_pred))


#### 📌 Step 10: Confusion Matrix Visualization

In [None]:
plt.figure(figsize=(5, 4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
