In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load dataset from CSV file
df = pd.read_csv("email_spam_dataset.csv")  # Ensure this is the correct file path

# Display first few rows to check the data
print(df.head())

# Define feature columns and target variable
feature_columns = ["Email Length", "Num. Links", "Num. Spam Words", "Has Offer (1/0)", "Has Urgent (1/0)"]
X = df[feature_columns]  # Features
y = df["Spam (1=Yes, 0=No)"]  # Target variable

# Split the dataset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict on test data
y_pred = knn.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Predict a new email based on its features
# Ensure new email features match the training DataFrame format
new_email_features = pd.DataFrame([[150, 1, 2, 1, 0]], columns=feature_columns)

# Predict using KNN
prediction = knn.predict(new_email_features)

# Output prediction
print("The email is SPAM." if prediction[0] == 1 else "The email is NOT spam.")


   Email Length  Num. Links  Num. Spam Words  Has Offer (1/0)  \
0           254           3                2                0   
1           356           4                0                1   
2           269           2                6                0   
3           321           2                5                1   
4           103           2                3                0   

   Has Urgent (1/0)  Spam (1=Yes, 0=No)  
0                 0                   1  
1                 0                   1  
2                 0                   1  
3                 1                   1  
4                 0                   0  
Model Accuracy: 83.33%
The email is SPAM.
