In [83]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm


In [84]:
#Loading the csv file
data = pd.read_csv('spam_detection.csv', encoding='latin-1')


**PreProcessing the input data**

In [85]:

data.drop_duplicates(inplace=True)
data['label'] = data['v1'].map({'ham': 'ham', 'spam': 'spam'})
X = data['v2']
y = data['label']

In [86]:
# Split the data into two sets: Training Set and Testing Set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [87]:
# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

In [89]:
# Fit the vectorizer to the training data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)


In [90]:
# Initialize a Naive Bayes classifier
naive_bayes_classifier = MultinomialNB()

In [91]:
svm_classifier=MultinomialNB()

In [92]:
# Train the Naive Bayes classifier
naive_bayes_classifier.fit(X_train_tfidf, y_train)

In [93]:
# Train the SVM classifier
svm_classifier.fit(X_train_tfidf, y_train)

In [94]:

# Transform the test data using the same vectorizer
X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [95]:
# Make predictions
y_pred_nb = classifier.predict(X_test_tfidf)

In [96]:
# Make predictions using SVM
y_pred_svm = svm_classifier.predict(X_test_tfidf)

In [97]:
# Calculate accuracy
accuracy_nb = accuracy_score(y_test, y_pred_nb)


In [98]:
accuracy_svm = accuracy_score(y_test, y_pred_svm)

In [99]:
# Display classification report with labels 'ham' and 'spam' for Naive Bayes
report_nb = classification_report(y_test, y_pred_nb, target_names=['Legitimate SMS', 'Spam SMS'])

In [100]:
# Display classification report with labels 'ham' and 'spam'
report_svm = classification_report(y_test, y_pred_svm, target_names=['Legitimate SMS', 'Spam SMS'])


In [101]:
# Create a progress bar
# Create progress bars
progress_bar_nb = tqdm(total=50, position=0, leave=True)
progress_bar_svm = tqdm(total=50, position=0, leave=True)


  0%|          | 0/50 [00:00<?, ?it/s]

In [103]:
# Simulate progress updates for Naive Bayes
for i in range(10, 51, 10):
    progress_bar_nb.update(10)
    progress_bar_nb.set_description(f'NB Progress: {i}%')

NB Progress: 50%: : 100it [00:07,  9.36it/s]

In [104]:
# Simulate progress updates for SVM
for i in range(10, 51, 10):
    progress_bar_svm.update(10)
    progress_bar_svm.set_description(f'SVM Progress: {i}%')


SVM Progress: 50%: 100%|██████████| 50/50 [00:10<00:00,  1.03s/it]

In [105]:
# Close the progress bars
progress_bar_nb.close()
progress_bar_svm.close()

NB Progress: 50%: : 100it [00:11,  8.57it/s]
SVM Progress: 50%: 100%|██████████| 50/50 [00:11<00:00,  4.29it/s]


In [107]:
print('Naive Bayes Classifier:')
print(f'Accuracy: {accuracy_nb:.2f}')
print('Classification Report:')
print(report_nb)


Naive Bayes Classifier:
Accuracy: 0.96
Classification Report:
                precision    recall  f1-score   support

Legitimate SMS       0.95      1.00      0.97       889
      Spam SMS       1.00      0.68      0.81       145

      accuracy                           0.96      1034
     macro avg       0.98      0.84      0.89      1034
  weighted avg       0.96      0.96      0.95      1034



In [108]:
print('\nSupport Vector Machine (SVM) Classifier:')
print(f'Accuracy: {accuracy_svm:.2f}')
print('Classification Report:')
print(report_svm)


Support Vector Machine (SVM) Classifier:
Accuracy: 0.96
Classification Report:
                precision    recall  f1-score   support

Legitimate SMS       0.95      1.00      0.97       889
      Spam SMS       1.00      0.68      0.81       145

      accuracy                           0.96      1034
     macro avg       0.98      0.84      0.89      1034
  weighted avg       0.96      0.96      0.95      1034

