## Spam Detection Projects


## Step 1: Import Necessary Libraries


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


## Step 2: Load the Dataset


In [2]:
# Load the dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', header=None, names=['label', 'message'])

# Preview the dataset
df.head(10)


Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
5,spam,FreeMsg Hey there darling it's been 3 week's n...
6,ham,Even my brother is not like to speak with me. ...
7,ham,As per your request 'Melle Melle (Oru Minnamin...
8,spam,WINNER!! As a valued network customer you have...
9,spam,Had your mobile 11 months or more? U R entitle...


In [3]:
df.message[9]

'Had your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call The Mobile Update Co FREE on 08002986030'

## Step 3: Preprocess the Data


In [4]:
# Convert labels to binary format
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Check for missing values
df.isnull().sum()


label      0
message    0
dtype: int64

## Step 4: Split the Data into Training and Testing Sets


In [5]:
X = df['message']
y = df['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


## Step 5: Transform the Text Data into Feature Vectors


In [6]:
# Initialize the CountVectorizer
vectorizer = CountVectorizer()

# Fit and transform the training data into a bag of words
X_train_transformed = vectorizer.fit_transform(X_train)

# Transform the testing data (using the same vectorizer)
X_test_transformed = vectorizer.transform(X_test)


## Step 6: Train the Naive Bayes Model


In [7]:
# Initialize the Naive Bayes model
model = MultinomialNB()

# Train the model
model.fit(X_train_transformed, y_train)


## Step 7: Make Predictions and Evaluate the Model


In [8]:
# Make predictions on the test set
y_pred = model.predict(X_test_transformed)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)


Accuracy: 0.99
Confusion Matrix:
[[1444    4]
 [  12  212]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1448
           1       0.98      0.95      0.96       224

    accuracy                           0.99      1672
   macro avg       0.99      0.97      0.98      1672
weighted avg       0.99      0.99      0.99      1672



## Step 8: Test with New Data


In [9]:
# Test with a new message
new_message = ["Congratulations! You've won a free ticket to the Bahamas. Call now!"]
new_message_transformed = vectorizer.transform(new_message)
prediction = model.predict(new_message_transformed)

print("Prediction:", "Spam" if prediction[0] == 1 else "Not Spam")


Prediction: Spam


In [10]:
def new(new_message):
    # Test with a new message
    new_message = [new_message]
    new_message_transformed = vectorizer.transform(new_message)
    prediction = model.predict(new_message_transformed)

    print("Prediction:", "Spam" if prediction[0] == 1 else "Not Spam")
    

In [11]:
new("Congratulations! You've won a free ticket to the Bahamas. Call now!")

Prediction: Spam


In [12]:
new(" The team did a fantastic job on the presentation today.")

Prediction: Not Spam


In [14]:
new("Let’s schedule a call to discuss the project timeline.")
# "I’ll be on vacation next week, but I’ll check my email periodically.")

Prediction: Not Spam


In [None]:
new("Had your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call The Mobile Update Co FREE on 08002986030")

In [None]:
import pickle

In [None]:
pickle.dump(model,open('model_spam.pkl','wb'))

In [None]:
pickle.dump(vectorizer,open('vectorizer.pkl','wb'))