In [2]:
from collections import Counter

# Step 1: Distance Function
def euclidean_distance(point1, point2):
    return sum((p1 - p2) ** 2 for p1, p2 in zip(point1, point2)) ** 0.5

# Step 2: k-NN algorithm
def knn(data, query, k, distance_fn, choice_fn):
    neighbor_distances_and_indices = []

    # Step 3: Find the distance of the query point to all other points
    for index, example in enumerate(data):
        # example[:-1] is used to consider only features, not the label
        distance = distance_fn(example[0], query)  # Ensure example[0] is a list of features
        neighbor_distances_and_indices.append((distance, index))

    # Step 4: Sort by distance and select k-nearest neighbors
    sorted_neighbors = sorted(neighbor_distances_and_indices)

    # Select k nearest neighbors
    k_nearest_distances_and_indices = sorted_neighbors[:k]

    # Step 5: Get the labels of the selected k neighbors
    k_nearest_labels = [data[i][-1] for _, i in k_nearest_distances_and_indices]

    # Step 6: Choice function to make a decision based on neighbors
    return choice_fn(k_nearest_labels)

# Majority voting
def majority_vote(labels):
    vote_count = Counter(labels)
    winner, _ = vote_count.most_common(1)[0]
    return winner

# Example usage with corrected structure for training data
# Now, each example is a tuple with a list of features and the label as the second element
training_data = [
    ([1, 2], 'Class1'),
    ([3, 1], 'Class2'),
    ([2, 3], 'Class1'),
    ([5, 3], 'Class2'),
    ([3, 5], 'Class1'),
    ([4, 4], 'Class2'),
]

# Predict the class of a new data point with features [3,3]
query = [3, 3]
k = 3

# Call the kNN function
predicted_class = knn(training_data, query, k, euclidean_distance, majority_vote)
print(predicted_class)


Class2
