# SPAM SMS DETECTION

## Build an AI model that can classify SMS messages as spam or legitimate. Use techniques like TF-IDF or word embeddings with classifiers like Naive Bayes, Logistic Regression, or Support Vector Machines to identify spam messages

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.pipeline import Pipeline

# Load the SMS spam dataset
spam_data = pd.read_csv("C:\\Users\\Jenisha Rebello\\Downloads\\spam.csv", encoding='latin-1')

# Preprocess: Remove stop words and perform stemming
stop_words = set(stopwords.words("english"))
stemmer = PorterStemmer()

def preprocess_text(text):
    words = text.split()
    words = [stemmer.stem(word) for word in words if word.lower() not in stop_words]
    return " ".join(words)

spam_data['Processed_SMS'] = spam_data['SMS'].apply(preprocess_text)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(spam_data['Processed_SMS'], spam_data['Label'], test_size=0.2, random_state=42)

# Create a pipeline with TF-IDF vectorizer and Naive Bayes classifier
spam_classifier = Pipeline([
    ('tfidf_vectorizer', TfidfVectorizer()),
    ('naive_bayes_classifier', MultinomialNB())
])

# Train the model
spam_classifier.fit(X_train, y_train)

# Make predictions on the test set
test_predictions = spam_classifier.predict(X_test)

# Print accuracy and metrics
accuracy = accuracy_score(test_predictions, y_test)
confusion_matrix_result = confusion_matrix(y_test, test_predictions)
classification_report_result = classification_report(y_test, test_predictions)

print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(confusion_matrix_result)
print("Classification Report:")
print(classification_report_result)

# Take user input
user_input_sms = input("Enter an SMS message: ")

# Make predictions on the user input
user_input_processed = preprocess_text(user_input_sms)
user_prediction = spam_classifier.predict([user_input_processed])

# Display the prediction
print(f"Prediction: {user_prediction[0]}")

Accuracy: 0.9668161434977578
Confusion Matrix:
[[965   0]
 [ 37 113]]
Classification Report:
              precision    recall  f1-score   support

    Not spam       0.96      1.00      0.98       965
        spam       1.00      0.75      0.86       150

    accuracy                           0.97      1115
   macro avg       0.98      0.88      0.92      1115
weighted avg       0.97      0.97      0.96      1115

Enter an SMS message: 07732584351 - Rodger Burns - MSG = We tried to call you re your reply to our sms for a free nokia mobile + free camcorder. Please call now 08000930705 for delivery tomorrow
Prediction: spam
