In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv('emails.csv')

# 1. Preprocess the data
# Drop unnecessary columns that are not useful for the model
df = df.drop(['Email No.'], axis=1)

# Check for missing values and drop if any
df = df.dropna()

# Convert any categorical data into numerical (if applicable)
# In this case, 'spam' is already 0 and 1, so no need for encoding

# Split the dataset into features (X) and target (y)
X = X = df.drop(columns=[ 'Prediction'])
y = df['Prediction']

# 2. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. K-Nearest Neighbors (KNN) Classifier
# Ask the user to input the value of k
k = int(input("Enter the value of k for K-Nearest Neighbors: "))
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)

# 4. Support Vector Machine (SVM) Classifier
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

# 5. Evaluate the models
def evaluate_model(y_true, y_pred, model_name):
    print(f'{model_name} Performance:')
    print(f'Accuracy: {accuracy_score(y_true, y_pred):.2f}')
    print('Confusion Matrix:')
    print(confusion_matrix(y_true, y_pred))
    print('Classification Report:')
    print(classification_report(y_true, y_pred))

# Evaluation for KNN
evaluate_model(y_test, y_pred_knn, 'K-Nearest Neighbors')

# Evaluation for SVM
evaluate_model(y_test, y_pred_svm, 'Support Vector Machine')