In [None]:
import pandas as pd

df = pd.read_csv('spam.csv')

df.head()

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df['Category'] = label_encoder.fit_transform(df['Category'])

vectorizer = CountVectorizer()
message_embeddings = vectorizer.fit_transform(df['Message'])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    message_embeddings, df['Category'], test_size=0.2, random_state=42
)

In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score


# Model 1: Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_predictions)
nb_f1 = f1_score(y_test, nb_predictions)

print("Naive Bayes:")
print("Accuracy:", nb_accuracy)
print("F1 Score:", nb_f1)


# Model 2: Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
dt_f1 = f1_score(y_test, dt_predictions)

print("\nDecision Tree:")
print("Accuracy:", dt_accuracy)
print("F1 Score:", dt_f1)


# Model 3: Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
rf_f1 = f1_score(y_test, rf_predictions)

print("\nRandom Forest:")
print("Accuracy:", rf_accuracy)
print("F1 Score:", rf_f1)


Naive Bayes:
Accuracy: 0.9856502242152466
F1 Score: 0.9466666666666667

Decision Tree:
Accuracy: 0.9713004484304932
F1 Score: 0.8904109589041096

Random Forest:
Accuracy: 0.9766816143497757
F1 Score: 0.9044117647058824


In [None]:
from sklearn.metrics import confusion_matrix

# Confusion Matrix for Naive Bayes
nb_cm = confusion_matrix(y_test, nb_predictions)
print("\nNaive Bayes Confusion Matrix:")
print(nb_cm)

# Confusion Matrix for Decision Tree
dt_cm = confusion_matrix(y_test, dt_predictions)
print("\nDecision Tree Confusion Matrix:")
print(dt_cm)

# Confusion Matrix for Random Forest
rf_cm = confusion_matrix(y_test, rf_predictions)
print("\nRandom Forest Confusion Matrix:")
print(rf_cm)



Naive Bayes Confusion Matrix:
[[957   9]
 [  7 142]]

Decision Tree Confusion Matrix:
[[953  13]
 [ 19 130]]

Random Forest Confusion Matrix:
[[966   0]
 [ 26 123]]
