<a href="https://colab.research.google.com/github/Khadija-Zafar/DEP/blob/main/Task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv('SMSSpamCollection.csv', encoding='latin-1')
data = data.rename(columns={"Label": "Target", "Message": "Text"})

# In the following step we are converting labels to binary (0 for 'ham' and 1 for 'spam')
data['Target'] = data['Target'].map({'ham': 0, 'spam': 1})

X_train, X_test, y_train, y_test = train_test_split(data['Text'], data['Target'], test_size=0.2, random_state=42)

# Now we are vectorizing the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_vectorized = tfidf_vectorizer.fit_transform(X_train)
X_test_vectorized = tfidf_vectorizer.transform(X_test)

# Classifiers
classifiers = {
    "Naive Bayes": MultinomialNB(),
    "Support Vector Machine": SVC(kernel='linear')
}
results = {}
for clf_name, clf in classifiers.items():
    clf.fit(X_train_vectorized, y_train)
    y_pred = clf.predict(X_test_vectorized)
    accuracy = accuracy_score(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    results[clf_name] = {"Accuracy": accuracy, "Classification Report": classification_rep}

for clf_name, result in results.items():
    print(f"\n{clf_name} Results:")
    print("Accuracy:", result["Accuracy"])
    print("Classification Report:")
    print(result["Classification Report"])



Naive Bayes Results:
Accuracy: 0.9650224215246637
Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       966
           1       1.00      0.74      0.85       149

    accuracy                           0.97      1115
   macro avg       0.98      0.87      0.91      1115
weighted avg       0.97      0.97      0.96      1115


Support Vector Machine Results:
Accuracy: 0.9937219730941704
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       966
           1       1.00      0.95      0.98       149

    accuracy                           0.99      1115
   macro avg       1.00      0.98      0.99      1115
weighted avg       0.99      0.99      0.99      1115

