In [1]:
from sklearn.datasets import load_iris

# Load the Iris dataset
data = load_iris()
X = data.data.tolist()   # Convert to pure Python lists
y = data.target.tolist()

# --- Pseudo-Random Number Generator (LCG) & Shuffle Implementation ---

# We'll implement a simple Linear Congruential Generator (LCG) with a fixed seed.
_seed = 42
def my_rand():
    global _seed
    # LCG parameters
    _seed = (_seed * 1103515245 + 12345) % (2**31)
    return _seed

def my_randint(a, b):
    # Return a random integer between a and b (inclusive)
    return a + (my_rand() % (b - a + 1))

def shuffle_list(lst):
    # Implement Fisher-Yates shuffle using our own pseudo-random generator.
    n = len(lst)
    for i in range(n - 1, 0, -1):
        j = my_randint(0, i)
        lst[i], lst[j] = lst[j], lst[i]

# Create and shuffle indices
indices = list(range(len(X)))
shuffle_list(indices)

# --- Train-Test Split (80% train, 20% test) ---

def train_test_split(X, y, indices, test_size=0.2):
    n = len(X)
    split_index = int(n * (1 - test_size))
    train_indices = indices[:split_index]
    test_indices = indices[split_index:]
    X_train = [X[i] for i in train_indices]
    X_test  = [X[i] for i in test_indices]
    y_train = [y[i] for i in train_indices]
    y_test  = [y[i] for i in test_indices]
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = train_test_split(X, y, indices, test_size=0.2)

# --- Euclidean Distance Calculation ---

def euclidean_distance(p1, p2):
    total = 0
    for i in range(len(p1)):
        diff = p1[i] - p2[i]
        total += diff * diff
    return total ** 0.5  # Using exponentiation for square root

# --- Majority Vote Function (without collections.Counter) ---

def majority_vote(labels):
    counts = {}
    for label in labels:
        if label in counts:
            counts[label] += 1
        else:
            counts[label] = 1
    max_count = -1
    majority_label = None
    for label in counts:
        if counts[label] > max_count:
            max_count = counts[label]
            majority_label = label
    return majority_label

# --- k-NN Classifier Implementation ---

def k_nearest_neighbors(X_train, y_train, test_point, k=3):
    distances = []
    for i in range(len(X_train)):
        dist = euclidean_distance(X_train[i], test_point)
        distances.append((dist, y_train[i]))
    # Sort by distance (using the built-in sort)
    distances.sort(key=lambda x: x[0])
    # Extract the labels of the k closest points
    k_labels = [label for (_, label) in distances[:k]]
    return majority_vote(k_labels)

# --- Make Predictions on Test Data ---

y_pred = []
for test_point in X_test:
    pred = k_nearest_neighbors(X_train, y_train, test_point, k=3)
    y_pred.append(pred)

# --- Evaluate Predictions: Collect Correct and Wrong Predictions ---

correct = []
wrong = []
for i in range(len(y_test)):
    if y_pred[i] == y_test[i]:
        correct.append((X_test[i], y_pred[i]))
    else:
        wrong.append((X_test[i], y_pred[i], y_test[i]))

# --- Print Results ---

accuracy = (len(correct) / len(y_test)) * 100
print("Accuracy: {:.2f}%".format(accuracy))

print("\nCorrect Predictions:")
for features, pred in correct:
    print("Features:", features, "Predicted Class:", data.target_names[pred])

print("\nWrong Predictions:")
for features, pred, actual in wrong:
    print("Features:", features, "Predicted:", data.target_names[pred], "Actual:", data.target_names[actual])


Accuracy: 93.33%

Correct Predictions:
Features: [5.1, 3.5, 1.4, 0.2] Predicted Class: setosa
Features: [6.1, 2.8, 4.7, 1.2] Predicted Class: versicolor
Features: [4.4, 3.2, 1.3, 0.2] Predicted Class: setosa
Features: [4.9, 3.6, 1.4, 0.1] Predicted Class: setosa
Features: [5.4, 3.4, 1.7, 0.2] Predicted Class: setosa
Features: [5.0, 3.2, 1.2, 0.2] Predicted Class: setosa
Features: [5.5, 2.4, 3.8, 1.1] Predicted Class: versicolor
Features: [7.7, 3.0, 6.1, 2.3] Predicted Class: virginica
Features: [6.5, 2.8, 4.6, 1.5] Predicted Class: versicolor
Features: [4.5, 2.3, 1.3, 0.3] Predicted Class: setosa
Features: [7.2, 3.0, 5.8, 1.6] Predicted Class: virginica
Features: [6.0, 2.2, 4.0, 1.0] Predicted Class: versicolor
Features: [6.3, 3.4, 5.6, 2.4] Predicted Class: virginica
Features: [5.8, 4.0, 1.2, 0.2] Predicted Class: setosa
Features: [6.5, 3.0, 5.2, 2.0] Predicted Class: virginica
Features: [4.8, 3.4, 1.6, 0.2] Predicted Class: setosa
Features: [5.0, 3.4, 1.5, 0.2] Predicted Class: setos

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Labels

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize k-NN classifier with k=3
knn = KNeighborsClassifier(n_neighbors=3)

# Train the model
knn.fit(X_train, y_train)

# Predict on test data
y_pred = knn.predict(X_test)

# Print correct and wrong predictions
correct = []
wrong = []

for i in range(len(y_test)):
    if y_pred[i] == y_test[i]:
        correct.append((X_test[i], y_pred[i]))
    else:
        wrong.append((X_test[i], y_pred[i], y_test[i]))

# Display results
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

print("\nCorrect Predictions:")
for features, pred in correct:
    print(f"Features: {features}, Predicted Class: {iris.target_names[pred]}")

print("\nWrong Predictions:")
for features, pred, actual in wrong:
    print(f"Features: {features}, Predicted: {iris.target_names[pred]}, Actual: {iris.target_names[actual]}")


Accuracy: 100.00%

Correct Predictions:
Features: [6.1 2.8 4.7 1.2], Predicted Class: versicolor
Features: [5.7 3.8 1.7 0.3], Predicted Class: setosa
Features: [7.7 2.6 6.9 2.3], Predicted Class: virginica
Features: [6.  2.9 4.5 1.5], Predicted Class: versicolor
Features: [6.8 2.8 4.8 1.4], Predicted Class: versicolor
Features: [5.4 3.4 1.5 0.4], Predicted Class: setosa
Features: [5.6 2.9 3.6 1.3], Predicted Class: versicolor
Features: [6.9 3.1 5.1 2.3], Predicted Class: virginica
Features: [6.2 2.2 4.5 1.5], Predicted Class: versicolor
Features: [5.8 2.7 3.9 1.2], Predicted Class: versicolor
Features: [6.5 3.2 5.1 2. ], Predicted Class: virginica
Features: [4.8 3.  1.4 0.1], Predicted Class: setosa
Features: [5.5 3.5 1.3 0.2], Predicted Class: setosa
Features: [4.9 3.1 1.5 0.1], Predicted Class: setosa
Features: [5.1 3.8 1.5 0.3], Predicted Class: setosa
Features: [6.3 3.3 4.7 1.6], Predicted Class: versicolor
Features: [6.5 3.  5.8 2.2], Predicted Class: virginica
Features: [5.6 2.5 