In [7]:
import numpy as np
import pandas as pd

data = pd.read_csv('emails.csv')
data = data.drop(columns=['Email No.'])
X = data.drop(columns=['Prediction']).values
y = data['Prediction'].values

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

#1NN algorithm
def one_nn(train_features, train_labels, test_features):
    predictions = []
    for test_instance in test_features:
        distances = [euclidean_distance(test_instance, train_instance) for train_instance in train_features]
        nearest_neighbor_index = np.argmin(distances)
        predictions.append(train_labels[nearest_neighbor_index])
    return predictions

#Evaluation functions
def accuracy_score(y_true, y_pred):
    correct = sum(y_t == y_p for y_t, y_p in zip(y_true, y_pred))
    return correct / len(y_true)

def precision_score(y_true, y_pred, pos_label=1):
    true_positives = sum(y_t == y_p == pos_label for y_t, y_p in zip(y_true, y_pred))
    predicted_positives = sum(y_p == pos_label for y_p in y_pred)
    return true_positives / (predicted_positives or 1)  # avoid division by zero

def recall_score(y_true, y_pred, pos_label=1):
    true_positives = sum(y_t == y_p == pos_label for y_t, y_p in zip(y_true, y_pred))
    actual_positives = sum(y_t == pos_label for y_t in y_true)
    return true_positives / (actual_positives or 1)  # avoid division by zero

#5-fold cross-validation
folds = [(1, 1000), (1000, 2000), (2000, 3000), (3000, 4000), (4000, 5000)]

for fold, (start, end) in enumerate(folds):
    test_set = X[start:end]
    test_labels = y[start:end]
    train_set = np.concatenate((X[:start], X[end:]), axis=0)
    train_labels = np.concatenate((y[:start], y[end:]), axis=0)
    
    predictions = one_nn(train_set, train_labels, test_set)
    
    accuracy = accuracy_score(test_labels, predictions)
    precision = precision_score(test_labels, predictions)
    recall = recall_score(test_labels, predictions)
    
    print(f"Fold {fold + 1}:")
    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}\n")


Fold 1:
Accuracy: 0.8238, Precision: 0.6535, Recall: 0.8140

Fold 2:
Accuracy: 0.8530, Precision: 0.6857, Recall: 0.8664

Fold 3:
Accuracy: 0.8620, Precision: 0.7212, Recall: 0.8380

Fold 4:
Accuracy: 0.8510, Precision: 0.7164, Recall: 0.8163

Fold 5:
Accuracy: 0.7750, Precision: 0.6057, Recall: 0.7582

