# Ripley 1D Test

In [None]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from pathlib import Path
from dataclasses import dataclass

In [None]:
@dataclass
class Parameters:
    REP:int
    NPTS:int
    NPTS_cl:int
    cl_std:float
    cl_loc:float
    LINELENGTH:float
    def NDST(self):
        """
        The number of distances
        """
        return self.NPTS**2 - self.NPTS
    def DENS(self):
        """
        The density of points
        """
        return self.NDST()/self.LINELENGTH


# Functions
$K(r) = \frac{1}{N^2-N}  \sum \limits_{i=0}^N \sum \limits_{j=0}^N D(i,j)<r \textrm{ (if i!=j)}$

* `K(r)` = Ripleys K-function
* `D(i,j)` = Distance between i and j
* `N` = Number of points

$L(r) = L*K(r) - 2*r $

* `L(r)` = Ripleys L-function
* `L` = Line Length

The `K(r)` function is a cummulative count of the number of distances within a radius (`r`), normalized to the total number of distances. It ranges from `K(0) = 0` to `K(inf)=1` if no edge correction is applied.

The `L(r)` function applies normalization to `K(r)` to the expected number of distances for a random distribution. It has an expected value of `0` for random distributed points. 

# Edge correction: Loop the line
Every distance is counted twice, once directly and once presuming the endpoints of the line form a loop.

In [None]:
def runRipley(p):
    distances = []
    ripley_lrs = []
    for r in range(p.REP):
        data = np.random.rand(p.NPTS)*p.LINELENGTH
        data[0:p.NPTS_cl] = p.cl_loc + np.random.randn(p.NPTS_cl)*p.cl_std
        dist = np.zeros(p.NDST())
        idx_dist = 0
        for i in range(p.NPTS):
            for j in range(i+1,p.NPTS):  # Measuring distances only once since a->b = b->a ; this is compensated later when calculating ripley_lr
                dist[idx_dist] = np.abs(data[i]-data[j])
                idx_dist += 1
                dist[idx_dist] = p.LINELENGTH-np.abs(data[i]-data[j])  # loop via other side
                idx_dist += 1
                
        dist = np.sort(dist)
        ripley_lr = [((2*x+2)/p.DENS()) - 2*dist[x] for x in range(len(dist))] # (2*x+2) because every distance is present twice (a->b and b->a)
        distances.append(dist)
        ripley_lrs.append(ripley_lr)
    
    fig = go.Figure()
    for r in range(p.REP):
        fig.add_trace(go.Scatter(x=distances[r], y=ripley_lrs[r],
                        mode='lines',
                        name=f'trace {r+1}'))
    fig.update_layout(xaxis_range=[0,p.LINELENGTH])
    return distances, ripley_lrs, fig


In [None]:
parameters = Parameters(REP=20, NPTS=100, NPTS_cl=0, cl_std=0.05, cl_loc=0.5, LINELENGTH=10)
x,y,fig = runRipley(parameters)
fig.update_layout(title='Random Points')
fig.update_layout(yaxis_range=[-1,3], xaxis_range=[0,parameters.LINELENGTH/2])
fig.show()
# for i in range(parameters.REP):
#     df = pd.DataFrame({'r':x[i], 'ripley_l(r)':y[i]})
#     pth = Path(Path.cwd(), f"curve_rand{i}.csv")
#     df.to_csv(pth)

In [None]:
parameters.NPTS_cl=50
x,y,fig = runRipley(parameters)
fig.update_layout(title='Clustered Points')
fig.update_layout(yaxis_range=[-1,3], xaxis_range=[0,parameters.LINELENGTH/2])
fig.show()
# for i in range(parameters.REP):
#     df = pd.DataFrame({'r':x[i], 'ripley_l(r)':y[i]})
#     pth = Path(Path.cwd(), f"curve_clustered{i}.csv")
#     df.to_csv(pth)