In [2]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

# Function to calculate the mean distance of k nearest neighbors
def mean_knn_distance(points, k=4):
    # Fit the kNN model
    knn_model = NearestNeighbors(n_neighbors=k+1)  
    knn_model.fit(points)

    # Calculate distances and indices of k nearest neighbors
    distances, indices = knn_model.kneighbors(points)

    mean_distances = np.mean(distances[:, 1:], axis=1) 

    return mean_distances

# Example data with 100 points
np.random.seed(42) 
num_points = 100
points = np.random.rand(num_points, 3) * 10

mean_distances = mean_knn_distance(points, k=4)
total_mean = mean_distances.mean()

print("Mean distance of 4 nearest neighbors for each data point:")
print(mean_distances)
print("The total mean is",total_mean)


Mean distance of 4 nearest neighbors for each data point:
[2.17776641 1.50317672 1.85707984 3.18310406 1.31087489 1.75991853
 1.72106418 1.58782663 2.08077509 1.95451991 1.54564448 1.94294474
 1.5664224  1.69678522 2.19057125 2.07628753 2.27053596 1.70346768
 2.10550156 2.09627597 1.7496683  1.6384475  2.11673919 2.49701347
 1.84537417 1.92804828 1.17508773 1.77987325 1.53328518 2.05185862
 1.86713364 1.95468249 1.69065111 2.40038326 2.12626451 1.52075138
 2.03634185 2.27701253 1.93882087 1.98662494 1.77266283 1.69451449
 2.36568677 2.09586424 2.34609583 1.87355489 2.02298954 2.27301147
 1.49969629 2.09517437 1.393114   2.12279624 2.01415716 1.94719866
 1.82371307 1.05983246 1.81230143 2.06467907 2.04719626 2.0921844
 1.1552081  2.28476557 1.60737833 1.38421041 2.19360727 2.46638814
 1.49384756 2.25001185 1.53565046 1.62674488 2.14361697 1.63169983
 2.29788604 1.40196593 1.81129019 2.425124   1.76045266 2.44648139
 1.7274011  1.97350673 1.96557384 1.5597563  2.10506293 2.06463315
 1.07

### Sample Data

In [None]:
import numpy as np
import plotly.express as px

# Cone creation 
def f(x, y):
    return np.sin(np.sqrt(x**2 + y**2))

theta = 2 * np.pi * np.random.random(1000)
r = 6 * np.random.random(1000)
x = np.ravel(r * np.sin(theta))
y = np.ravel(r * np.cos(theta))
z = f(x, y)

fig = px.scatter_3d(x=x, y=y, z=z, color=z, opacity=0.7, size_max=5)

fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))


fig.show()


In [1]:
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from sklearn.neighbors import NearestNeighbors

def f(x, y):
    return np.sin(np.sqrt(x**2 + y**2))

theta = 2 * np.pi * np.random.random(1000)
r = 6 * np.random.random(1000)
x = np.ravel(r * np.sin(theta))
y = np.ravel(r * np.cos(theta))
z = f(x, y)

# Combine x, y, and z coordinates into a 3D NumPy array
points = np.column_stack((x, y, z))

# Function to calculate the mean distance of k nearest neighbors
def mean_knn_distance(points, k=4):
    # Fit the kNN model
    knn_model = NearestNeighbors(n_neighbors=k+1)  # +1 because the closest point is the point itself
    knn_model.fit(points)

    # Calculate distances and indices of k nearest neighbors
    distances, indices = knn_model.kneighbors(points)

    # Calculate the mean distance of k nearest neighbors for each data point
    mean_distances = np.mean(distances[:, 1:], axis=1)  # Exclude the first element, which is the distance to the point itself

    return mean_distances

# Calculate the mean distance of 4 nearest neighbors for each data point
mean_distances = mean_knn_distance(points, k=25)
total_mean = mean_distances.mean()

# Create a mask for points with distance greater than the total_mean
mask = mean_distances > total_mean

# Create a 3D scatter plot
fig = go.Figure()

# Add all points to the plot
fig.add_trace(go.Scatter3d(x=x, y=y, z=z, 
                           mode='markers',
                           marker=dict(size=5, color='red'),
                           name='Points'))

# Highlight points with distance greater than the total_mean
highlighted_points_x = x[mask]
highlighted_points_y = y[mask]
highlighted_points_z = z[mask]
if len(highlighted_points_x) > 0:
    fig.add_trace(go.Scatter3d(x=highlighted_points_x,
                               y=highlighted_points_y,
                               z=highlighted_points_z,
                               mode='markers',
                               marker=dict(size=5, color='blue'),
                               name='Points with Distance > Mean Distance'))

fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))
fig.update_layout(title='3D Scatter Plot of 1000 Points')
fig.show()
