In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv(r"C:\Users\SarthakPatil\Downloads\dataset\emails.csv")
df.head()

df.info()

df.isnull().sum()

# Drop missing rows if any
df.dropna(inplace=True)

sns.countplot(x='spam', data=df)
plt.title("Spam vs Not Spam Distribution")
plt.show()

# Text feature extraction
X = df.drop('Email No.', axis=1)
y = df['spam']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# K-Nearset Neighbors
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("\nKNN Classification Report:\n", classification_report(y_test, y_pred_knn, zero_division=0))

# Confusion matrix for KNN
plt.figure(figsize=(4,3))
sns.heatmap(confusion_matrix(y_test, y_pred_knn), annot=True, fmt='d', cmap='Blues')
plt.title("KNN Confusion Matrix")
plt.show()


# Support Vector Machine
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("\nSVM Classification Report:\n", classification_report(y_test, y_pred_svm, zero_division=0))

# Confusion matrix for SVM
plt.figure(figsize=(4,3))
sns.heatmap(confusion_matrix(y_test, y_pred_svm), annot=True, fmt='d', cmap='Greens')
plt.title("SVM Confusion Matrix")
plt.show()

x1 = X_test.iloc[:, 0]
x2 = X_test.iloc[:, 1]

plt.scatter(x1[y_test == 0], x2[y_test == 0], color='red', alpha=0.5, label='Spam')
plt.scatter(x1[y_test == 1], x2[y_test == 1], color='blue', alpha=0.5, label='Not Spam')

# Proper dashed line
plt.plot([x1.min(), x1.max()], [x1.min(), x1.max()], 'k--')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('SVM Class Separation (Spam vs Not Spam)')
plt.legend()
plt.show()

