In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
df = pd.read_csv("emails.csv")   # Make sure emails.csv is in the same folder

# X → Word frequencies (features)
X = df.iloc[:, 1:-1]   # all columns except first and last

# Y → Label (Spam = 1, Not Spam = 0)
Y = df.iloc[:, -1]     # last column

# Train-Test Split (80% train, 20% test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# ===========================
# KNN Classification
# ===========================
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)
knn_pred = knn.predict(X_test)

# ===========================
# SVM Classification
# ===========================
svm = SVC(kernel='linear')
svm.fit(X_train, Y_train)
svm_pred = svm.predict(X_test)

# ===========================
# Performance Evaluation
# ===========================
print("\n=== KNN Performance ===")
print("Accuracy:", accuracy_score(Y_test, knn_pred))
print("Confusion Matrix:\n", confusion_matrix(Y_test, knn_pred))

print("\n=== SVM Performance ===")
print("Accuracy:", accuracy_score(Y_test, svm_pred))
print("Confusion Matrix:\n", confusion_matrix(Y_test, svm_pred))

print("\n=== Classification Report (SVM Recommended) ===")
print(classification_report(Y_test, svm_pred))



=== KNN Performance ===
Accuracy: 0.8628019323671497
Confusion Matrix:
 [[646  93]
 [ 49 247]]

=== SVM Performance ===
Accuracy: 0.9594202898550724
Confusion Matrix:
 [[715  24]
 [ 18 278]]

=== Classification Report (SVM Recommended) ===
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       739
           1       0.92      0.94      0.93       296

    accuracy                           0.96      1035
   macro avg       0.95      0.95      0.95      1035
weighted avg       0.96      0.96      0.96      1035

