In [25]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn import metrics
import os

# Load the dataset (adjust path as needed)
data_path = r'C:\Users\ADMIN\Desktop\CODSOFT\CODSOFT\SPAM SMS DETECTION\spam.csv'
df = pd.read_csv(data_path, encoding='latin-1')
df = df[['v1', 'v2']]  # Selecting only the relevant columns
df.columns = ['label', 'message']  # Renaming columns for clarity

# Preprocess labels
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Define classifiers
classifiers = {
    'Naive Bayes': MultinomialNB(),
    'Logistic Regression': LogisticRegression(),
    'Support Vector Machine': SVC()
}

# Training and evaluation
results = {}
for clf_name, clf in classifiers.items():
    pipeline = make_pipeline(vectorizer, clf)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    
    # Store results
    results[clf_name] = {
        'Accuracy': metrics.accuracy_score(y_test, y_pred),
        'Precision': metrics.precision_score(y_test, y_pred),
        'Recall': metrics.recall_score(y_test, y_pred),
        'F1 Score': metrics.f1_score(y_test, y_pred),
        'Confusion Matrix': metrics.confusion_matrix(y_test, y_pred)
    }

# Display results
for clf_name, metrics_dict in results.items():
    print(f"Classifier: {clf_name}")
    print("Accuracy:", metrics_dict['Accuracy'])
    print("Precision:", metrics_dict['Precision'])
    print("Recall:", metrics_dict['Recall'])
    print("F1 Score:", metrics_dict['F1 Score'])
    print("Confusion Matrix:\n", metrics_dict['Confusion Matrix'])
    print("\n")


Classifier: Naive Bayes
Accuracy: 0.9623318385650225
Precision: 1.0
Recall: 0.72
F1 Score: 0.8372093023255814
Confusion Matrix:
 [[965   0]
 [ 42 108]]


Classifier: Logistic Regression
Accuracy: 0.967713004484305
Precision: 0.9913793103448276
Recall: 0.7666666666666667
F1 Score: 0.8646616541353384
Confusion Matrix:
 [[964   1]
 [ 35 115]]


Classifier: Support Vector Machine
Accuracy: 0.9820627802690582
Precision: 1.0
Recall: 0.8666666666666667
F1 Score: 0.9285714285714286
Confusion Matrix:
 [[965   0]
 [ 20 130]]


