In [2]:
import math

# Function to calculate Euclidean distance between two vectors
def euclidean_distance(vector1, vector2):
    distance = math.sqrt(sum((x - y) ** 2 for x, y in zip(vector1, vector2)))
    return distance

# Function to calculate Manhattan distance between two vectors
def manhattan_distance(vector1, vector2):
    distance = sum(abs(x - y) for x, y in zip(vector1, vector2))
    return distance

# Function to implement k-NN classifier
def k_nn_classifier(training_data, test_instance, k):
    distances = [(euclidean_distance(test_instance, x[0]), x[1]) for x in training_data]
    sorted_distances = sorted(distances, key=lambda x: x[0])
    k_nearest_neighbors = sorted_distances[:k]
    labels = [neighbor[1] for neighbor in k_nearest_neighbors]
    # Assuming labels are categorical and selecting the most common label as prediction
    predicted_label = max(set(labels), key=labels.count)
    return predicted_label

# Function to convert categorical variables to numeric using label encoding
def label_encoding_categorical(data):
    unique_values = list(set(data))
    encoding_dict = {value: index for index, value in enumerate(unique_values)}
    encoded_data = [encoding_dict[value] for value in data]
    return encoded_data

# Function to convert categorical variables to numeric using One-Hot encoding
def one_hot_encoding_categorical(data):

    unique_values = list(set(data))
    one_hot_encoded_data = [[1 if value == val else 0 for val in unique_values] for value in data]
    return one_hot_encoded_data

# Main program
if __name__ == "__main__":
    # Take input from the user
    vector1 = [float(x) for x in input("Enter the first vector values (comma-separated): ").split(',')]
    vector2 = [float(x) for x in input("Enter the second vector values (comma-separated): ").split(',')]
    k_value = int(input("Enter the value of k for k-NN: "))
    categorical_data = input("Enter categorical data (comma-separated): ").split(',')

    # Euclidean distance
    euclidean_dist = euclidean_distance(vector1, vector2)
    print(f"Euclidean Distance: {euclidean_dist}")

    # Manhattan distance
    manhattan_dist = manhattan_distance(vector1, vector2)
    print(f"Manhattan Distance: {manhattan_dist}")

    # k-NN classifier
    training_data = [([1, 2], 'A'), ([2, 3], 'B'), ([3, 4], 'A'), ([4, 5], 'B')]  # Example training data
    predicted_label = k_nn_classifier(training_data, vector1, k_value)
    print(f"Predicted Label using k-NN: {predicted_label}")

    # Label encoding
    label_encoded_data = label_encoding_categorical(categorical_data)
    print(f"Label Encoded Data: {label_encoded_data}")

    # One-Hot encoding
    one_hot_encoded_data = one_hot_encoding_categorical(categorical_data)
    print(f"One-Hot Encoded Data: {one_hot_encoded_data}")


Enter the first vector values (comma-separated): 1,2
Enter the second vector values (comma-separated): 2,3
Enter the value of k for k-NN: 3
Enter categorical data (comma-separated): 'apple', 'banana', 'orange', 'apple', 'orange', 'banana'
Euclidean Distance: 1.4142135623730951
Manhattan Distance: 2.0
Predicted Label using k-NN: A
Label Encoded Data: [0, 3, 1, 2, 1, 3]
One-Hot Encoded Data: [[1, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]
