In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

# Read the data
df = pd.read_csv("spamham.csv")

# Replace null values with an empty string
df.fillna('', inplace=True)

# Categorize Spam as 0 and Not spam as 1
df['Category'] = df['Category'].map({'ham': 1, 'spam': 0})

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(df['Message'], df['Category'], train_size=0.8, test_size=0.2, random_state=4)

# Feature extraction using TF-IDF Vectorizer
tfidf = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)
x_train_feat = tfidf.fit_transform(x_train)
x_test_feat = tfidf.transform(x_test)

# SVM model
svm_model = LinearSVC()
svm_model.fit(x_train_feat, y_train)
svm_pred = svm_model.predict(x_test_feat)

# Multinomial Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(x_train_feat, y_train)
nb_pred = nb_model.predict(x_test_feat)

# Evaluate SVM model
print("~~~~~~~~~~SVM RESULTS~~~~~~~~~~")
print("Accuracy Score using SVM: {:.4f}".format(accuracy_score(y_test, svm_pred) * 100))
print("F Score using SVM: {:.4f}".format(f1_score(y_test, svm_pred, average='macro') * 100))
cm_svm = confusion_matrix(y_test, svm_pred)
print("Confusion matrix using SVM:")
print(cm_svm)

# Evaluate Multinomial Naive Bayes model
print("~~~~~~~~~~MNB RESULTS~~~~~~~~~~")
print("Accuracy Score using MNB: {:.4f}".format(accuracy_score(y_test, nb_pred) * 100))
print("F Score using MNB: {:.4f}".format(f1_score(y_test, nb_pred, average='macro') * 100))
cm_nb = confusion_matrix(y_test, nb_pred)
print("Confusion matrix using MNB:")
print(cm_nb)
