In [19]:
#Write a function to calculate the Euclidean distance and Manhattan distance between two vectors. The vectors dimension is variable. Please don’t use any distance calculation functions available in Python.

import math

def euclidean_distance(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must have the same dimension")
    sum_of_squares = sum((v1 - v2) ** 2 for v1, v2 in zip(vector1, vector2))
    return math.sqrt(sum_of_squares)

def manhattan_distance(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must have the same dimension")
    sum_of_absolute_differences = sum(abs(v1 - v2) for v1, v2 in zip(vector1, vector2))
    return sum_of_absolute_differences

def get_user_input():
    vector_a = [float(x) for x in input("Enter values for vector A (comma-separated): ").split(',')]
    vector_b = [float(x) for x in input("Enter values for vector B (comma-separated): ").split(',')]
    return vector_a, vector_b

if __name__ == "__main__":
    vector_a, vector_b = get_user_input()

    euclidean_dist = euclidean_distance(vector_a, vector_b)
    print(f"Euclidean Distance: {euclidean_dist}")

    manhattan_dist = manhattan_distance(vector_a, vector_b)
    print(f"Manhattan Distance: {manhattan_dist}")


Enter values for vector A (comma-separated): 1,2,3,4
Enter values for vector B (comma-separated): 2,3,4,5
Euclidean Distance: 2.0
Manhattan Distance: 4.0


In [26]:
#Write a function to implement k-NN classifier. k is a variable and based on that the count of neighbors should be selected.
def euclidean_distance(point1, point2):
    return ((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) ** 0.5

def find_neighbors(training_data, labels, test_point, k):
    distances = [(i, euclidean_distance(test_point, data_point)) for i, data_point in enumerate(training_data)]
    distances.sort(key=lambda x: x[1])
    k_nearest_neighbors = distances[:k]
    neighbor_labels = [labels[i] for i, _ in k_nearest_neighbors]
    return neighbor_labels

def kNN_classifier(training_data, labels, test_point, k):
    neighbor_labels = find_neighbors(training_data, labels, test_point, k)
    predicted_label = max(set(neighbor_labels), key=neighbor_labels.count)
    return predicted_label

if __name__ == "__main__":
    training_data = [[1, 2], [2, 3], [3, 4], [4, 5]]
    labels = ['A', 'A', 'B', 'B']
    test_point = [2.5, 3.5]
    k = 3

    predicted_label = kNN_classifier(training_data, labels, test_point, k)
    print(f"Predicted Label: {predicted_label}")





Predicted Label: A


In [27]:
#Write a function to convert categorical variables to numeric using label encoding. Don’t use any existing functionalities.
def label_encode_categorical(data, column_name):
    unique_labels = list(set(data[column_name]))
    label_encoder = {label: idx for idx, label in enumerate(unique_labels)}

    data[column_name] = [label_encoder[label] for label in data[column_name]]
    return data, label_encoder

def main():
    my_data = {
        "Gender": ['F', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'F', 'M'],
        "Name": ['Cindy', 'Carl', 'Johnny', 'Stacey', 'Andy', 'Sara', 'Victor', 'Martha', 'Mindy', 'Max']
    }

    print("Original Data:\n")
    for key, values in my_data.items():
        print(f"{key}: {values}")

    my_data, label_encoder = label_encode_categorical(my_data, 'Gender')

    print("\nUnique Labels:", set(my_data['Gender']))
    print("\nData after Label Encoding:\n")
    for key, values in my_data.items():
        print(f"{key}: {values}")

if __name__ == "__main__":
    main()





Original Data:

Gender: ['F', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'F', 'M']
Name: ['Cindy', 'Carl', 'Johnny', 'Stacey', 'Andy', 'Sara', 'Victor', 'Martha', 'Mindy', 'Max']

Unique Labels: {0, 1}

Data after Label Encoding:

Gender: [0, 1, 1, 0, 1, 0, 1, 0, 0, 1]
Name: ['Cindy', 'Carl', 'Johnny', 'Stacey', 'Andy', 'Sara', 'Victor', 'Martha', 'Mindy', 'Max']


In [22]:
#Write a function to convert categorical variables to numeric using One-Hotencoding. Don’t use any existing functionalities.
import pandas as pd

def one_hot_encode_categorical(dataframe, column_name):
    unique_categories = list(set(dataframe[column_name]))

    for category in unique_categories:
        new_column_name = f"{column_name}_{category}"
        dataframe[new_column_name] = (dataframe[column_name] == category).astype(int)

    dataframe = dataframe.drop(column_name, axis=1)

    return dataframe

my_data = {
    'fruit_list': ['Apple', 'Banana', 'Orange', 'Grape', 'Kiwi'],
    'quantity_list': [120, 85, 150, 50, 75],
    'category': ['A', 'B', 'A', 'C', 'B']
}

blk = pd.DataFrame(my_data)

print("Original DataFrame:\n")
print(blk)

blk = one_hot_encode_categorical(blk, 'category')

print("\nDataFrame after One-Hot Encoding:\n")
print(blk)



Original DataFrame:

  fruit_list  quantity_list category
0      Apple            120        A
1     Banana             85        B
2     Orange            150        A
3      Grape             50        C
4       Kiwi             75        B

DataFrame after One-Hot Encoding:

  fruit_list  quantity_list  category_C  category_B  category_A
0      Apple            120           0           0           1
1     Banana             85           0           1           0
2     Orange            150           0           0           1
3      Grape             50           1           0           0
4       Kiwi             75           0           1           0
