# SPAM SMS DETECTION

Build an AI model that can classify SMS messages as spam or
legitimate. Use techniques like TF-IDF or word embeddings with
classifiers like Naive Bayes, Logistic Regression, or Support Vector

In [57]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [58]:
# Read data from CSV file
df = pd.read_csv('spam.csv', encoding='latin-1')  # Adjust encoding if needed



In [59]:
# Assuming the CSV file has 'v1' and'v2' columns
df = df[['v1', 'v2']]
df.columns = ['label', 'message']  # Rename columns for clarity



In [60]:
# Data preprocessing
df['label'] = df['label'].map({'ham': 'ham', 'spam': 'spam'})  # Map labels to 'ham' and 'spam'



In [61]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)



In [62]:
# TF-IDF vectorization
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [63]:
# Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)



In [64]:
# Predictions
y_pred = classifier.predict(X_test_tfidf)



In [65]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)



In [43]:
print(f'Accuracy: {accuracy:.2f}') # two decimal place after point.
print('\nConfusion Matrix:')
print(conf_matrix)
print('\nClassification Report:')
print(classification_rep)



Accuracy: 0.97

Confusion Matrix:
[[965   0]
 [ 37 113]]

Classification Report:
              precision    recall  f1-score   support

         ham       0.96      1.00      0.98       965
        spam       1.00      0.75      0.86       150

    accuracy                           0.97      1115
   macro avg       0.98      0.88      0.92      1115
weighted avg       0.97      0.97      0.96      1115



In [66]:
# Print predicted labels, actual labels, and messages
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred, 'Message': X_test})
results['Actual'] = results['Actual'].map({'ham': 'ham', 'spam': 'spam'})
results['Predicted'] = results['Predicted'].map({'ham': 'ham', 'spam': 'spam'})
print("\nResults:")
print(results)



Results:
     Actual Predicted                                            Message
3245    ham       ham  Funny fact Nobody teaches volcanoes 2 erupt, t...
944     ham       ham  I sent my scores to sophas and i had to do sec...
1044   spam       ham  We know someone who you know that fancies you....
2484    ham       ham  Only if you promise your getting out as SOON a...
812    spam      spam  Congratulations ur awarded either å£500 of CD ...
...     ...       ...                                                ...
4264    ham       ham   &lt;DECIMAL&gt; m but its not a common car he...
2439    ham       ham  Rightio. 11.48 it is then. Well arent we all u...
5556    ham       ham  Yes i have. So that's why u texted. Pshew...mi...
4205    ham       ham                             Get the door, I'm here
4293   spam      spam  Kit Strip - you have been billed 150p. Netcoll...

[1115 rows x 3 columns]


In [67]:
# Filter DataFrame to show only spam messages
spam_messages = results[results['Predicted'] == 'spam']
print("\nIdentified Spam Messages:")
print(spam_messages)



Identified Spam Messages:
     Actual Predicted                                            Message
812    spam      spam  Congratulations ur awarded either å£500 of CD ...
1992   spam      spam  Free tones Hope you enjoyed your new content. ...
2952   spam      spam  URGENT! Your mobile was awarded a å£1,500 Bonu...
5567   spam      spam  This is the 2nd time we have tried 2 contact u...
3997   spam      spam  We tried to call you re your reply to our sms ...
...     ...       ...                                                ...
2641   spam      spam  You are guaranteed the latest Nokia Phone, a 4...
3904   spam      spam  Do you want a new video handset? 750 anytime a...
3858   spam      spam  Win the newest åÒHarry Potter and the Order of...
1193   spam      spam  Sex up ur mobile with a FREE sexy pic of Jorda...
4293   spam      spam  Kit Strip - you have been billed 150p. Netcoll...

[113 rows x 3 columns]
