## Task 1

In [5]:
from tensorflow.keras.datasets import mnist
import numpy as np
from scipy.stats import mode

In [6]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()

## Task 2

In [7]:
# Reshape and normalize the data
train_X = train_X.reshape(train_X.shape[0], -1) / 255.0
test_X = test_X.reshape(test_X.shape[0], -1) / 255.0

In [13]:
def knn_classify(train_X, train_y, test_X, k):
    """
    Classifies test set according to the k-Nearest Neighbors (kNN) rule.
    
    Parameters:
        train_X (ndarray): Training set feature matrix
        train_y (ndarray): Training set labels
        test_X (ndarray): Test set feature matrix
        k (int): Number of nearest neighbors to use for classification
        
    Returns:
        test_predictions (ndarray): Predicted labels for the test set
    """

    # Check that k > 0 and k <= cardinality of the training set
    n = train_X.shape[0]
    if k <= 0 or k > n:
        raise ValueError(f"k must be greater than 0 and less than or equal to {n}")

    # Check that the number of columns in test_X equals the number of columns in train_X
    if train_X.shape[1] != test_X.shape[1]:
        raise ValueError("Number of columns in test set must match the training set")

    # Perform kNN classification
    test_predictions = []
    for test_sample in test_X:
        # Compute distances from test_sample to all training samples
        distances = np.linalg.norm(train_X - test_sample, axis=1)
        
        # Get the indices of the k nearest neighbors
        neighbor_indices = np.argsort(distances)[:k]
        
        # Get the labels of the k nearest neighbors
        neighbor_labels = train_y[neighbor_indices]
        
        # Determine the most frequent label (mode) among the neighbors
        predicted_label = mode(neighbor_labels).mode
        if isinstance(predicted_label, np.ndarray):  # Ensure it's an array and take the first element if so
            predicted_label = predicted_label[0]
        
        # Append the predicted label to the list
        test_predictions.append(predicted_label)

    return np.array(test_predictions)

In [11]:
def compute_error_rate(predictions, targets):
    """
    Computes the error rate of predictions against the true targets.
    
    Parameters:
        predictions (ndarray): Predicted labels
        targets (ndarray): Actual labels
        
    Returns:
        error_rate (float): Error rate calculated as number of errors divided by number of samples
    """
    errors = np.sum(predictions != targets)
    error_rate = errors / len(targets)
    return error_rate

In [None]:
k = 10

# Classify the test set
predictions = knn_classify(train_X, train_y, test_X, k)

# Check if there is an optional target column in test set (i.e., test_y provided)
error_rate = compute_error_rate(predictions, test_y)
print(f"Error Rate: {error_rate}")

## Task 3