# Trigonometry (1) | Exploring the unit cirlce

## Table of Contents

## Background

In [None]:
# =============================================================================
# Sampling behaviour & structure — Random vs Sobol (2D)
# -----------------------------------------------------------------------------
# Function:
#   - Show how sampling "fills space" differently (coverage, gaps, clustering)
#
# What you get:
#   (A) Scatter: Random vs Sobol (side-by-side)
#   (B) 2D bin-count heatmaps: where samples concentrate
#   (C) Nearest-neighbour distance histogram: clustering signature
#   (D) Simple metrics: discrepancy proxy + min NN distance stats
#
# Notes:
#   - Sobol uses SciPy's QMC (scipy>=1.7). If not installed, fallback to Halton.
#   - Designed to match repo style: compact header, explain-in-plot callout,
#     widget controls in a single row.
# =============================================================================

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import qmc
from ipywidgets import interact, IntSlider

plt.rcParams.update({
    "font.family": "serif",
    "mathtext.fontset": "cm",
    "axes.unicode_minus": False,
})

%matplotlib inline


In [None]:
# =============================================================================
# Beautiful "LaTeX-style" sampling comparison in Jupyter
#   1) np.random (uniform)
#   2) Sobol (unscrambled)
#   3) Sobol (scrambled)  <-- slider controls its seed
#   4) Nearest-neighbour distance distribution
# =============================================================================

# -----------------------------
# Config
# -----------------------------
m_power = 8
N = 2 ** m_power


# -----------------------------
# Helpers
# -----------------------------
def style_square(ax, title):
    ax.set_title(title, fontsize=16, pad=10)
    ax.set_aspect("equal", adjustable="box")
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_xticks([0, 0.5, 1.0])
    ax.set_yticks([0, 0.5, 1.0])
    ax.grid(True, alpha=0.20)
    ax.plot([0, 1, 1, 0, 0], [0, 0, 1, 1, 0], lw=2)
    ax.tick_params(labelsize=11)

def hist_with_kde(ax, data, bins, label=None):
    # Histogram (normalized)
    ax.hist(data, bins=bins, density=True, alpha=0.6)

    # KDE (smooth density)
    kde = gaussian_kde(data)
    x = np.linspace(data.min(), data.max(), 400)
    ax.plot(x, kde(x), linewidth=2)

    if label is not None:
        ax.set_title(label, fontsize=14)

def nearest_neighbor_distances(X):
    # O(N^2) but fine for N=1024
    diff = X[:, None, :] - X[None, :, :]
    D2 = np.sum(diff**2, axis=-1)
    np.fill_diagonal(D2, np.inf)
    return np.sqrt(np.min(D2, axis=1))


def style_nn(ax, title):
    ax.set_title(title, fontsize=14, pad=8)
    ax.set_xlabel(r"$d_{\mathrm{NN}}$", fontsize=12)
    ax.set_ylabel("Density", fontsize=12)
    ax.set_xlim(0.0, 0.12)
    ax.set_ylim(0, 100)
    ax.grid(True, alpha=0.25)
    ax.tick_params(labelsize=10)


# =============================================================================
# Main plotting function (called by the slider)
# =============================================================================
def plot_sampling(m_power=8, scramble_seed=1):
    # ONLY change: m_power is now dynamic, and N is recomputed from it
    m_power = int(m_power)
    N = 2 ** m_power

    # 1) np.random
    rng = np.random.default_rng(int(scramble_seed))
    X_rand = rng.random((N, 2))

    # 2) Sobol (unscrambled)
    sobol_plain = qmc.Sobol(d=2, scramble=False)
    X_sobol = sobol_plain.random_base2(m=m_power)

    # 3) Sobol (scrambled)
    sobol_scr = qmc.Sobol(d=2, scramble=True, seed=int(scramble_seed))
    X_scr = sobol_scr.random_base2(m=m_power)

    # NN distances
    d_rand = nearest_neighbor_distances(X_rand)
    d_sob  = nearest_neighbor_distances(X_sobol)
    d_scr  = nearest_neighbor_distances(X_scr)

    # Common bins for comparability
    d_all = np.concatenate([d_rand, d_sob, d_scr])
    bins = np.linspace(d_all.min(), d_all.max(), 45)

    # -----------------------------
    # Figure layout (4x2) as requested
    #   Top row: 3 scatters + (optional) overlay NN
    #   Bottom row: NN under each + (optional) notes/empty
    # -----------------------------
    fig = plt.figure(figsize=(22, 10.2))
    gs = fig.add_gridspec(
        nrows=2, ncols=4,
        left=0.04, right=0.98,
        bottom=0.10, top=0.86,
        wspace=0.18, hspace=0.28
    )

    axA = fig.add_subplot(gs[0, 0])
    axB = fig.add_subplot(gs[0, 1])
    axC = fig.add_subplot(gs[0, 2])
    axD = fig.add_subplot(gs[0, 3])  # overlay panel (optional)

    axA_nn = fig.add_subplot(gs[1, 0])
    axB_nn = fig.add_subplot(gs[1, 1])
    axC_nn = fig.add_subplot(gs[1, 2])
    axD_nn = fig.add_subplot(gs[1, 3])  # keep as notes/empty for now

    fig.suptitle(
        rf"Uniform Sampling in $[0,1]^2$  (N = {N})  —  Scrambled seed = {int(scramble_seed)}",
        fontsize=20,
        y=0.95
    )

    # -----------------------------
    # Top row scatters
    # -----------------------------
    axA.scatter(X_rand[:, 0], X_rand[:, 1], s=14, alpha=0.85)
    style_square(axA, r"(A) $\mathrm{np.random}$ (pseudo-random)")

    axB.scatter(X_sobol[:, 0], X_sobol[:, 1], s=14, alpha=0.85)
    style_square(axB, r"(B) Sobol (unscrambled)")

    axC.scatter(X_scr[:, 0], X_scr[:, 1], s=14, alpha=0.85)
    style_square(axC, r"(C) Sobol (scrambled)")

    # -----------------------------
    # Top-right panel: overlay NN distributions (kept simple)
    # -----------------------------
    axD.hist(d_rand, bins=bins, density=True, alpha=0.60, label="Random")
    axD.hist(d_sob,  bins=bins, density=True, alpha=0.60, label="Sobol")
    axD.hist(d_scr,  bins=bins, density=True, alpha=0.60, label="Scrambled Sobol")
    axD.set_title(r"(D) NN distance (overlay)", fontsize=16, pad=10)
    axD.set_xlabel(r"$d_{\mathrm{NN}}$")
    axD.set_ylabel("Density")
    axD.grid(True, alpha=0.25)
    axD.legend(frameon=True)

    # -----------------------------
    # Bottom row: NN under each representation
    # -----------------------------
    hist_with_kde(axA_nn, d_rand, bins, r"NN distribution for (A)")
    axA_nn.axvline(np.mean(d_rand), linestyle="--", linewidth=2, label=r"$\langle d_{\mathrm{NN}} \rangle$")
    style_nn(axA_nn, r"NN distances for (A)")

    hist_with_kde(axB_nn, d_sob, bins, r"NN distribution for (B)")
    axB_nn.axvline(np.mean(d_sob), linestyle="--", linewidth=2)
    style_nn(axB_nn, r"NN distances for (B)")

    hist_with_kde(axC_nn, d_scr, bins, r"NN distribution for (C)")
    axC_nn.axvline(np.mean(d_scr), linestyle="--", linewidth=2)
    style_nn(axC_nn, r"NN distances for (C)")

    # Bottom-right: keep clean (notes panel)
    axD_nn.axis("off")
    axD_nn.text(
        0.02, 0.98,
        "Interpretation:\n"
        r"• Random → heavier left tail (very small $d_{\mathrm{NN}}$ clusters)"
        "\n"
        r"• Sobol → suppresses tiny $d_{\mathrm{NN}}$ while staying space-filling"
        "\n"
        r"• Scrambling keeps low-discrepancy but removes visible lattice structure",
        va="top", ha="left",
        fontsize=12,
        bbox=dict(boxstyle="round,pad=0.5", alpha=0.10)
    )

    fig.text(
        0.5, 0.03,
        r"Unscrambled Sobol is deterministic/structured; scrambling randomizes while preserving low-discrepancy.",
        ha="center",
        fontsize=12,
        alpha=0.85
    )

    plt.show()


# =============================================================================
# Sliders: ONLY added m slider (plus existing seed slider)
# =============================================================================
interact(
    plot_sampling,
    m_power=IntSlider(
        value=m_power, min=4, max=12, step=1,
        description="m",
        continuous_update=False
    ),
    scramble_seed=IntSlider(
        value=1, min=0, max=999, step=1,
        description="seed",
        continuous_update=True
    )
);

interactive(children=(IntSlider(value=8, continuous_update=False, description='m', max=12, min=4), IntSlider(v…

## Interpretation

At first glance, we could say that **scrambled Sobol explores more of the space evenly**, while `np.random` concentrates much of its nearest-neighbour (NN) distribution around a characteristic mean (as seen in the NN plots). However, increasing **m** produces a similar left-shift in NN distances for *all* samplers.

This naturally raises the question:

### Does increasing **m** make nearest-neighbour distances worse?

The short answer is **no** — although it can look that way if NN distances are interpreted incorrectly.

Below is the correct way to read what is happening.

### What increasing **m** actually does

In this notebook, the number of sample points is defined as

$$
N = 2^m
$$

As **m** increases:

- The number of points grows exponentially  
- All points remain within the fixed domain $[0,1]^2$  
- Points are therefore forced to pack closer together  

From geometry alone, the characteristic nearest-neighbour distance in 2D scales as

$$
d_{\mathrm{NN}} \sim N^{-1/2} = 2^{-m/2}
$$

Therefore, **NN distances must decrease as m increases** — for *any* sampling method.

This behaviour is unavoidable and expected.

### Why this is *not* counter-productive

A smaller NN distance at higher **m** does **not** indicate poorer sampling.

It simply reflects:

- Higher sampling resolution  
- Finer filling of the same domain  

This occurs for:

- `np.random`  
- Sobol (unscrambled)  
- Sobol (scrambled)  
- Any reasonable space-filling sampler  

As a result, the *absolute* value of NN distance is **not** a standalone quality metric.

### What NN distance is actually telling us

Nearest-neighbour distance is a **structural diagnostic**, not an absolute score.

It should be used to compare:

- Different sampling methods **at the same value of m**  
- The **shape** of the NN distribution, not its raw scale  

At fixed **m**, the meaningful distinctions are:

- **Random sampling**
  - Heavy left tail  
  - Very small NN distances (clustering)
- **Sobol (unscrambled)**
  - Suppressed left tail  
  - More regular spacing
- **Sobol (scrambled)**
  - Similar spacing guarantees  
  - Reduced visible lattice structure  

These qualitative differences persist for all values of **m**.

### What *would* be a mistake

It would be incorrect to compare NN distances across different values of **m** and conclude:

> “This sampler is worse because its NN distances are smaller.”

Such a conclusion ignores the intrinsic density scaling built into the problem.

### How increasing **m** should be interpreted

Increasing **m** does not degrade NN distances — it increases **resolution**.

The correct question is:

> As resolution increases, does the sampling preserve its structural character?

Good samplers:

- Scale predictably as $N$ increases  
- Maintain the shape of their NN distributions  
- Suppress extreme clustering  

Poor samplers:

- Develop heavy left tails  
- Exhibit unstable scaling behaviour  
- Produce voids and clumps  

From this perspective, increasing **m** is not counter-productive — it is **diagnostically useful**.

### (Advanced) Making NN distances comparable across **m**

To compare NN structure *across different values of m*, the trivial density scaling should be removed by normalisation:

$$
\tilde d_{\mathrm{NN}} = d_{\mathrm{NN}} \sqrt{N}
$$

This produces a scale-free NN distance:

- Differences reflect structure rather than density  
- Distributions become comparable across resolutions  

This is the mathematically correct approach once you move beyond demonstrations.

### Summary: how to read the plots

- Smaller NN distances at higher **m** are expected  
- NN distance is meaningful only when comparing samplers at the same **m**  
- Increasing **m** improves resolution; it does not degrade sampling  
- Structural differences live in the **distribution shape**, not the raw scale  

If NN distances shrink *while structure is preserved*, the sampler is behaving correctly.


In [19]:
# =============================================================================
# Scale-free nearest-neighbour distribution:  d̃_NN = d_NN * sqrt(N)
# -----------------------------------------------------------------------------
# ONE plot only:
#   - Overlay KDE(+hist) of normalized NN distances for:
#       (A) Random
#       (B) Sobol (unscrambled)
#       (C) Sobol (scrambled; controlled by seed)
#
# Sliders:
#   - m_power  (N = 2^m_power)
#   - seed     (controls RNG + scrambled Sobol seed)
# =============================================================================

def nearest_neighbor_distances(X):
    diff = X[:, None, :] - X[None, :, :]
    D2 = np.sum(diff**2, axis=-1)
    np.fill_diagonal(D2, np.inf)
    return np.sqrt(np.min(D2, axis=1))


def plot_scale_free_nn(m_power=8, seed=1):
    m_power = int(m_power)
    seed = int(seed)
    N = 2 ** m_power

    # --- Samples
    rng = np.random.default_rng(seed)
    X_rand = rng.random((N, 2))

    sobol_plain = qmc.Sobol(d=2, scramble=False)
    X_sobol = sobol_plain.random_base2(m=m_power)

    sobol_scr = qmc.Sobol(d=2, scramble=True, seed=seed)
    X_scr = sobol_scr.random_base2(m=m_power)

    # --- NN distances
    d_rand = nearest_neighbor_distances(X_rand) * np.sqrt(N)
    d_sob  = nearest_neighbor_distances(X_sobol) * np.sqrt(N)
    d_scr  = nearest_neighbor_distances(X_scr) * np.sqrt(N)

    # --- Shared x grid for KDE
    all_d = np.concatenate([d_rand, d_sob, d_scr])
    x = np.linspace(all_d.min(), all_d.max(), 600)

    # --- Plot (single axis)
    fig, ax = plt.subplots(figsize=(10.8, 5.6))

    # Histograms (density)
    bins = np.linspace(all_d.min(), all_d.max(), 55)
    ax.hist(d_rand, bins=bins, density=True, alpha=0.30, label="Random")
    ax.hist(d_sob,  bins=bins, density=True, alpha=0.30, label="Sobol")
    ax.hist(d_scr,  bins=bins, density=True, alpha=0.30, label="Scrambled Sobol")

    # KDE curves
    ax.plot(x, gaussian_kde(d_rand)(x), linewidth=2.2)
    ax.plot(x, gaussian_kde(d_sob)(x),  linewidth=2.2)
    ax.plot(x, gaussian_kde(d_scr)(x),  linewidth=2.2)

    ax.set_title(
        rf"Scale-free NN spacing in $[0,1]^2$: $\tilde d_{{NN}} = d_{{NN}}\sqrt{{N}}$  (N = {N}, seed = {seed})",
        fontsize=16,
        pad=12
    )
    ax.set_xlabel(r"$\tilde d_{\mathrm{NN}} = d_{\mathrm{NN}}\sqrt{N}$", fontsize=13)
    ax.set_ylabel("Density", fontsize=13)
    ax.grid(True, alpha=0.25)
    ax.legend(frameon=True)
    plt.show()


interact(
    plot_scale_free_nn,
    m_power=IntSlider(value=8, min=4, max=12, step=1, description="m", continuous_update=False),
    seed=IntSlider(value=1, min=0, max=999, step=1, description="seed", continuous_update=True),
);


interactive(children=(IntSlider(value=8, continuous_update=False, description='m', max=12, min=4), IntSlider(v…

## conclusion

In conclusion, **scrambled Sobol explores the space more evenly at a fixed resolution**, suppressing very small nearest-neighbour distances and reducing clustering compared to `np.random`, which exhibits a heavier left tail in its NN distribution. The apparent left-shift of NN distances observed when increasing **m** occurs for *all* samplers and reflects a universal geometric effect of higher point density rather than a degradation in sampling quality. Consequently, NN distance should be interpreted as a **relative structural diagnostic at fixed m**, not as an absolute measure across different resolutions.