In [None]:
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations

In [None]:
# Define the dimensions to iterate over
num_dimesions = 10
dimensions = [2**i for i in range(num_dimesions+1)]

# Function to calculate squared Euclidean distance
def squared_euclidean_distance(x, y):
    return np.sum((x - y) ** 2)

# Function to calculate Manhattan distance
def manhattan_distance(x, y):
    return np.sum(np.abs(x - y))

# Function to sample points and calculate average distance and standard deviation
def calculate_distances(dim, distance_func):
    distances = []
    for _ in range(100):
        # Generate 100 points in the unit cube for given dimension
        points = np.random.rand(100, dim)
        # Calculate distances between all pairs of points
        pair_distances = [distance_func(p1, p2) for p1, p2 in combinations(points, 2)]
        distances.extend(pair_distances)
    # Calculate average distance and standard deviation
    avg_distance = np.mean(distances)
    std_deviation = np.std(distances)
    return avg_distance, std_deviation

In [None]:
# Record average distances and standard deviations for each dimension for both distance metrics
results_euclidean = {'avg': [], 'std': []}
results_manhattan = {'avg': [], 'std': []}

for dim in dimensions:
    avg_dist_euclidean, std_dev_euclidean = calculate_distances(dim, squared_euclidean_distance)
    avg_dist_manhattan, std_dev_manhattan = calculate_distances(dim, manhattan_distance)
    results_euclidean['avg'].append(avg_dist_euclidean)
    results_euclidean['std'].append(std_dev_euclidean)
    results_manhattan['avg'].append(avg_dist_manhattan)
    results_manhattan['std'].append(std_dev_manhattan)


In [None]:
# Print results for Euclidean distance
print("Euclidean Distance:")
print("Dimension\tAverage Distance\tStandard Deviation")
for dim in range(num_dimesions + 1):
    print(
        f"{2**dim}\t\t{results_euclidean['avg'][dim]}\t\t{results_euclidean['std'][dim]}"
    )

# Print results for Manhattan distance
print("\nManhattan Distance:")
print("Dimension\tAverage Distance\tStandard Deviation")
for dim in range(num_dimesions + 1):
    print(
        f"{2**dim}\t\t{results_manhattan['avg'][dim]}\t\t{results_manhattan['std'][dim]}"
    )

In [None]:
# Plotting
plt.figure(figsize=(12, 6))

# Plot for Euclidean distance
plt.subplot(1, 2, 1)
plt.plot(dimensions, results_euclidean['avg'], label='Average Distance', marker='o')
plt.plot(dimensions, results_euclidean['std'], label='Standard Deviation', marker='x')
plt.title('Euclidean Distance')
plt.xlabel('Dimension (d)')
plt.ylabel('Distance')
# plt.xscale('log')
plt.legend()

# Plot for Manhattan distance
plt.subplot(1, 2, 2)
plt.plot(dimensions, results_manhattan['avg'], label='Average Distance', marker='o')
plt.plot(dimensions, results_manhattan['std'], label='Standard Deviation', marker='x')
plt.title('Manhattan Distance')
plt.xlabel('Dimension (d)')
plt.ylabel('Distance')
# plt.xscale('log')
plt.legend()

plt.tight_layout()
plt.show()