# Counting pair algorithm

* KD-tree implementation

In [9]:
import numpy as np
from scipy.spatial import KDTree

def counting_pairs(points, max_distance): #max_distance is a maximum distance between a couple of two points
    # Build KD-tree
    tree = KDTree(points)
    
    # Query the tree to find all pairs within the distance threshold
    pairs = tree.query_pairs(max_distance)
    
    return pairs

# Example usage:
# Generate some random points
points = np.random.rand(100, 2) #crate array [.,.] between 0 and 1
#points = [0,1,2,3,4,5,6,7,8,9]
#print(points)

# Set the distance threshold
max_distance = 0.1

# Find pairs within the distance threshold
pairs = counting_pairs(points, max_distance)

print("Pairs within distance threshold:")
for pair in pairs:
    print(points[pair[0]], points[pair[1]])


Pairs within distance threshold:
[0.80624686 0.28005784] [0.8596703  0.33934299]
[0.73347621 0.30012908] [0.80624686 0.28005784]
[0.31163902 0.82263839] [0.36340315 0.89979   ]
[0.25107262 0.49085455] [0.30112547 0.42389012]
[0.73969026 0.09224683] [0.64921421 0.11945347]
[0.86151034 0.5442449 ] [0.82516654 0.47296848]
[0.62175261 0.45312447] [0.67916295 0.49203126]
[0.56971213 0.14426275] [0.54891969 0.06275564]
[0.40417569 0.33976483] [0.46646446 0.31652731]
[0.61715927 0.9451949 ] [0.58759487 0.92614309]
[0.61933652 0.08038952] [0.59461227 0.07464103]
[0.98884383 0.73375209] [0.90980494 0.73014818]
[0.03706311 0.3445396 ] [0.11347314 0.35013729]
[0.71934574 0.23998698] [0.7304506  0.26751575]
[0.85447273 0.81957392] [0.8026333  0.87598669]
[0.59592806 0.70640096] [0.69142366 0.69617398]
[0.20278151 0.32893417] [0.11347314 0.35013729]
[0.87060596 0.58698728] [0.86151034 0.5442449 ]
[0.4366259  0.82192328] [0.39490073 0.79124797]
[0.7129189  0.55334284] [0.78387682 0.54987607]
[0.2165

* Landy-Szalay estimator
It is a method to estimate the two-point correlation function o a distribution of points.

$$
\xi(r) = \frac{{DD(r) - 2DR(r) + RR(r)}}{{RR(r)}}
$$

DD(r) is the number of a data pairs separated by distance a r <br>
RR(r) is the number of a random pairs separated by distance a r <br>
DR(r) is the number of a data-random pairs separated by distance a r <br>

 


In [10]:
import numpy as np
from scipy.spatial import KDTree

def landy_szalay_estimator(data_points, random_points, r_bins):
    # Build KD-tree for data and random points
    data_tree = KDTree(data_points)
    random_tree = KDTree(random_points)
    
    # Initialize arrays to store pair counts
    dd_counts = np.zeros(len(r_bins) - 1)
    dr_counts = np.zeros(len(r_bins) - 1)
    rr_counts = np.zeros(len(r_bins) - 1)
    
    # Count pairs for data-data, data-random, and random-random pairs
    for i in range(len(r_bins) - 1):
        dd_counts[i] = len(data_tree.query_pairs(r_bins[i], r_bins[i + 1]))
        dr_counts[i] = len(data_tree.query_ball_tree(random_tree, r_bins[i], r_bins[i + 1]))
        rr_counts[i] = len(random_tree.query_pairs(r_bins[i], r_bins[i + 1]))
    
    # Compute Landy-Szalay estimator
    xi = (dd_counts - 2 * dr_counts + rr_counts) / rr_counts
    
    return xi

# Example usage:
# Generate some random data points and random points
data_points = np.random.rand(100, 2)
random_points = np.random.rand(200, 2)

# Define bins for separation distances
r_bins = np.linspace(0, 1, 20)

# Compute Landy-Szalay estimator
xi = landy_szalay_estimator(data_points, random_points, r_bins)

print("Landy-Szalay estimator values:")
print(xi)


Landy-Szalay estimator values:
[        -inf         -inf -99.         -12.26666667  -1.91935484
   0.34449761   0.88514851   1.05024155   1.13242784   1.17389719
   1.195586     1.19867675   1.20835165   1.20805921   1.20874122
   1.21379428   1.21987634   1.22209421   1.2241787 ]


  xi = (dd_counts - 2 * dr_counts + rr_counts) / rr_counts
