In [4]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.metrics import confusion_matrix

data = pd.read_csv("spam.csv", encoding='latin1')

print(data.columns)

x_data = data['v2']
y_data = data['v1']
split = (int)(0.8 * data.shape[0])
x_train = x_data[:split]
x_test = x_data[split:]
y_train = y_data[:split]
y_test = y_data[split:]

# Vectorize text data to numerical features
vectorizer = CountVectorizer()
x_train = vectorizer.fit_transform(x_train)
x_test = vectorizer.transform(x_test)

# Train Multinomial Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(x_train, y_train)

# Train Linear SVM model
svm_model = svm.LinearSVC()
svm_model.fit(x_train, y_train)

# Predict on test data
nb_preds = nb_model.predict(x_test)
svm_preds = svm_model.predict(x_test)

# Calculate confusion matrices
nb_cm = confusion_matrix(y_test, nb_preds, labels=['ham', 'spam'])
svm_cm = confusion_matrix(y_test, svm_preds, labels=['ham', 'spam'])

# Display confusion matrices as DataFrames
nb_df = pd.DataFrame(nb_cm, index=['Ham', 'Spam'], columns=['Ham', 'Spam'])
svm_df = pd.DataFrame(svm_cm, index=['Ham', 'Spam'], columns=['Ham', 'Spam'])
print("Multinomial NB Confusion Matrix:\n", nb_df)
print("\nSVM Confusion Matrix:\n", svm_df)

Index(['v1', 'v2', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], dtype='object')
Multinomial NB Confusion Matrix:
       Ham  Spam
Ham   962     8
Spam    8   137

SVM Confusion Matrix:
       Ham  Spam
Ham   967     3
Spam   13   132
