In [1]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import pdist, squareform

# Step 1: Crafting a meaningful dataset (imagine it's collected from a field survey)
data = {
    'x': [0.15, 0.35, 0.4, 0.9, 1.1, 1.25],
    'y': [0.25, 0.45, 0.55, 0.85, 0.95, 1.15]
}
coordinates = pd.DataFrame(data)

# Step 2: Compute pairwise distances
distance_matrix = squareform(pdist(coordinates[['x', 'y']], metric='euclidean'))

# Step 3: Assign groups using vectorized logic
group_labels = (distance_matrix < 0.5).astype(int)
group_ids = np.argmax(group_labels.cumsum(axis=0), axis=0)

# Assign the group IDs back to the DataFrame
coordinates['group'] = group_ids

# Output the resulting DataFrame
print(coordinates)

      x     y  group
0  0.15  0.25      2
1  0.35  0.45      2
2  0.40  0.55      2
3  0.90  0.85      5
4  1.10  0.95      5
5  1.25  1.15      5
