In [1]:
import pandas as pd
import numpy as np

def euclidean_distance(instance1, instance2):
    return np.linalg.norm(instance1 - instance2)

def manhattan_distance(instance1, instance2):
    return np.sum(np.abs(instance1 - instance2))

def cosine_similarity(instance1, instance2):
    dot_product = np.dot(instance1, instance2)
    norm1 = np.linalg.norm(instance1)
    norm2 = np.linalg.norm(instance2)
    return dot_product / (norm1 * norm2)

file_path = 'homeprices.csv' 
df = pd.read_csv(file_path)

print("Dataset loaded successfully:")
print(df)

distance_measures = {
    "1": ("Euclidean", euclidean_distance),
    "2": ("Manhattan", manhattan_distance),
    "3": ("Cosine Similarity", cosine_similarity)
}

print("\nSelect a distance measure:")
for key, (measure_name, _) in distance_measures.items():
    print(f"{key}. {measure_name}")

selected_measure_name = input("Enter the index or name of the distance measure: ")

if selected_measure_name in distance_measures:
    selected_measure= distance_measures[selected_measure_name][1]  
    selected_measure_name = distance_measures[selected_measure_name][0]  
else:
    print("Invalid distance measure selection. Please choose from the available options.")
    exit()

index1 = int(input(f"Enter index of the first instance (0 to {len(df)-1}): "))
index2 = int(input(f"Enter index of the second instance (0 to {len(df)-1}): "))

if 0 <= index1 < len(df) and 0 <= index2 < len(df):
    instance1 = df.iloc[index1, :-1].values  
    instance2 = df.iloc[index2, :-1].values 
    
    distance = selected_measure(instance1, instance2)
    print(f"{selected_measure_name} distance between instance {index1} and instance {index2}: {distance}")
else:
    print(f"Invalid indices. Please enter indices between 0 and {len(df)-1}.")


Dataset loaded successfully:
   area  bedrooms  age   price
0  2600       3.0   20  550000
1  3000       4.0   15  565000
2  3200       NaN   18  610000
3  3600       3.0   30  595000
4  4000       5.0    8  760000
5  4100       6.0    8  810000

Select a distance measure:
1. Euclidean
2. Manhattan
3. Cosine Similarity
Enter the index or name of the distance measure: 1
Enter index of the first instance (0 to 5): 0
Enter index of the second instance (0 to 5): 1
Euclidean distance between instance 0 and instance 1: 400.03249867979474
