In [13]:
import numpy as np
from scipy.stats import entropy

# Given data
X = np.array([1.2, 2.0, 3.5, 4.8, 5.1, 6.2, 7.5, 8.0, 9.3, 10.1])
y = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1, 0])

# Step 1: Calculate H(y)
p_0 = np.sum(y == 0) / len(y)
p_1 = np.sum(y == 1) / len(y)
H_y = entropy([p_0, p_1], base=2)

# Step 2: Calculate H(X)
hist, bin_edges = np.histogram(X, bins='auto', density=True)
bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
H_X = -np.sum(hist * np.log2(hist + 1e-10))  # Adding a small constant to avoid log(0)

# Step 3: Calculate H(X, y) using a 2D histogram
H_XY = 0
for label in [0, 1]:
    indices = (y == label)
    hist_xy, _, _ = np.histogram2d(X[indices], y[indices], bins=[bin_edges, [0, 1]], density=True)
    hist_xy_normalized = hist_xy / np.sum(hist_xy)
    H_XY -= np.sum(hist_xy_normalized * np.log2(hist_xy_normalized + 1e-10))

# Step 4: Calculate I(X; y)
I_XY = H_X + H_y - H_XY

# Step 5: Calculate MI(X, y)
MI_XY = I_XY / np.sqrt(H_X * H_y)

# Display the results
print(f"H(y): {H_y}")
print(f"H(X): {H_X}")
print(f"H(X, y): {H_XY}")
print(f"I(X; y): {I_XY}")
print(f"MI(X, y): {MI_XY}")


H(y): 1.0
H(X): 1.7293913396509493
H(X, y): 4.243856188476299
I(X; y): -1.5144648488253494
MI(X, y): -1.151628907593478


In [12]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.stats import entropy

# Given data
X = np.array([1.2, 2.0, 3.5, 4.8, 5.1, 6.2, 7.5, 8.0, 9.3, 10.1])
y = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1, 0])

# Convert X to a 2D array for compatibility with NearestNeighbors
X_2D = X.reshape(-1, 1)

# Step 3: Calculate H(X, y) using k-nearest neighbors
n_neighbors = 3
nn = NearestNeighbors(n_neighbors=n_neighbors)
nn.fit(X_2D)

distances, indices = nn.kneighbors(X_2D)

# Estimate joint probabilities using distances (inverse distance weighting)
joint_probs = np.exp(-distances**2 / 2)
joint_probs /= np.sum(joint_probs, axis=1, keepdims=True)

# Calculate H(X, y) using KNN
H_XY_knn = -np.sum(joint_probs * np.log2(joint_probs + 1e-10))

# Step 1: Calculate H(y)
p_0 = np.sum(y == 0) / len(y)
p_1 = np.sum(y == 1) / len(y)
H_y = entropy([p_0, p_1], base=2)

# Step 2: Calculate H(X)
hist, bin_edges = np.histogram(X, bins='auto', density=True)
bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
H_X = -np.sum(hist * np.log2(hist + 1e-10))  # Adding a small constant to avoid log(0)

# Step 4: Calculate I(X; y)
I_XY = H_X + H_y - H_XY_knn  # Use H_XY_knn instead of H_XY

# Step 5: Calculate MI(X, y)
MI_XY = I_XY / np.sqrt(H_X * H_y)

# Display the results
print(f"H(y): {H_y}")
print(f"H(X): {H_X}")
print(f"H(X, y) using k-nearest neighbors: {H_XY_knn}")
print(f"I(X; y) using k-nearest neighbors: {I_XY}")
print(f"MI(X, y) using k-nearest neighbors: {MI_XY}")


H(y): 1.0
H(X): 1.7293913396509493
H(X, y) using k-nearest neighbors: 14.342238661523512
I(X; y) using k-nearest neighbors: -11.612847321872563
MI(X, y) using k-nearest neighbors: -8.830637888829747
