In [None]:
import numpy as np
import matplotlib.pyplot as plt
import numpy.typing as npt

from stellar_stream import StellarStream

In [None]:
def inject_substructure(phi1, phi2=None, vlos=None,
                        amount=1, scale=0.5,  # typical width scale (same units as phi1)
                        mode="point",         # "point" or "binned"
                        kind="gaussian",      # currently only gaussian supported
                        depths=None,          # array-like or scalar: depletion fraction (0..1) for gaps OR amplitude for overdensity
                        amps=None,            # for overdensity amplitude (multiplicative)
                        rng=None,
                        binned_kwargs=None):
    """
    Inject `amount` substructures into a stream along phi1.

    Parameters
    ----------
    phi1 : 1D array
        along-stream coordinate of points (assumed in same units as `scale`).
    phi2 : 1D array or None
        across-stream coordinate (will be carried through unchanged except for duplication/removal).
    vlos : 1D array or None
        velocities (carried through).
    amount : int
        number of features to inject.
    scale : float
        nominal width (sigma) for gaussian perturbations (same units as phi1).
    mode : {"point","binned"}
        - "point": modify the point catalog by probabilistic removal (gaps) or Poisson duplications (overdensities).
        - "binned": expects binned_kwargs dict with keys (bins, density) and returns modified density.
    depths : scalar or array-like
        For gaps: fraction removed at center (0..1). If None, uniform random in [0.1,1.0].
    amps : scalar or array-like
        For overdensities: fractional amplitude (e.g. 0.5 means +50% at center). If None, drawn from Uniform(0.1,1.0).
    rng : np.random.Generator or None
        RNG for reproducibility. If None, uses np.random.default_rng().
    binned_kwargs : dict
        Required if mode == "binned". Should contain 'bins' (edges) and 'density' (array).
    
    Returns
    -------
    If mode == "point":
      phi1_new, phi2_new, vlos_new, injected_params
    If mode == "binned":
      density_new, injected_params

    injected_params: list of dicts {kind, center, sigma, depth/amp}
    """
    if rng is None:
        rng = np.random.default_rng()

    phi1 = np.asarray(phi1)
    N = len(phi1)
    has_phi2 = phi2 is not None
    has_vlos = vlos is not None
    if has_phi2:
        phi2 = np.asarray(phi2)
    if has_vlos:
        vlos = np.asarray(vlos)

    # Helpers to draw parameters
    def draw_centers(n):
        return rng.uniform(phi1.min(), phi1.max(), size=n)

    def draw_depths(n):
        if depths is None:
            return rng.uniform(0.1, 1.0, size=n)
        d = np.asarray(depths)
        return np.broadcast_to(d, (n,)) if d.size == 1 else d

    def draw_amps(n):
        if amps is None:
            return rng.uniform(0.1, 1.0, size=n)
        a = np.asarray(amps)
        return np.broadcast_to(a, (n,)) if a.size == 1 else a

    centers = draw_centers(amount)
    sigmas = rng.lognormal(mean=np.log(scale), sigma=0.3, size=amount)  # lognormal scatter
    depths_arr = draw_depths(amount)
    amps_arr = draw_amps(amount)

    injected = []

    if mode == "binned":
        # binned approach: apply multiplicative factors to provided density
        assert binned_kwargs is not None, "binned_kwargs required for mode='binned'."
        bins = np.asarray(binned_kwargs["bins"])
        density = np.asarray(binned_kwargs["density"]).astype(float)
        xcenters = 0.5 * (bins[:-1] + bins[1:])
        density_new = density.copy()
        for i in range(amount):
            c = centers[i]
            sigma = sigmas[i]
            if binned_kwargs.get("type", "gap") == "gap":
                depth = depths_arr[i]
                factor = 1.0 - depth * np.exp(-0.5 * ((xcenters - c)/sigma)**2)
            else:
                amp = amps_arr[i]
                factor = 1.0 + amp * np.exp(-0.5 * ((xcenters - c)/sigma)**2)
            density_new *= factor  # multiplicative modulation
            injected.append(dict(kind=binned_kwargs.get("type","gap"), center=float(c),
                                 sigma=float(sigma),
                                 depth=float(depths_arr[i]) if binned_kwargs.get("type","gap") else None,
                                 amp=float(amps_arr[i]) if not binned_kwargs.get("type","gap") else None))
        return density_new, injected

    # MODE == "point" (operate on catalogs)
    # We'll apply sequentially: for gaps we probabilistically remove points, for overdensities duplicate points with Poisson draws.
    phi1_work = phi1.copy()
    phi2_work = phi2.copy() if has_phi2 else None
    vlos_work = vlos.copy() if has_vlos else None

    for i in range(amount):
        c = centers[i]
        sigma = sigmas[i]
        depth = depths_arr[i]
        amp = amps_arr[i]

        # Gaussian profile (value from 0..1)
        # For a gap (underdensity): keep_prob = 1 - depth * exp(...)
        # For an overdensity: extra_expectation = amp * exp(...)
        g = np.exp(-0.5 * ((phi1_work - c)/sigma)**2)

        # --- gap (probabilistic removal) ---
        # compute keep probability per star
        keep_prob = 1.0 - depth * g
        # safety clamp to [0,1]
        keep_prob = np.clip(keep_prob, 0.0, 1.0)

        # sample uniform to decide which stars to keep
        u = rng.random(size=keep_prob.size)
        keep_mask = (u < keep_prob)

        # apply removal
        phi1_work = phi1_work[keep_mask]
        if has_phi2:
            phi2_work = phi2_work[keep_mask]
        if has_vlos:
            vlos_work = vlos_work[keep_mask]

        # --- overdensity (Poisson duplication) ---
        # For overdensity we add additional stars: for each remaining star, expected extra count = amp * g
        # Draw k ~ Poisson(amp * g) and duplicate point k times.
        # Note: we only apply overdensity in addition to gap if amp>0
        if amp > 1e-12:
            # recompute g on current catalog (after the gap)
            g2 = np.exp(-0.5 * ((phi1_work - c)/sigma)**2)
            lam = amp * g2  # expected extra count per star (can be <1)
            # draw Poisson for each star
            extra_counts = rng.poisson(lam)
            if extra_counts.sum() > 0:
                # indices with extra
                idx_extra = np.nonzero(extra_counts)[0]
                # build arrays to append
                phi1_extra = np.repeat(phi1_work[idx_extra], extra_counts[idx_extra])
                if has_phi2:
                    phi2_extra = np.repeat(phi2_work[idx_extra], extra_counts[idx_extra])
                if has_vlos:
                    vlos_extra = np.repeat(vlos_work[idx_extra], extra_counts[idx_extra])
                # append extras
                phi1_work = np.concatenate([phi1_work, phi1_extra])
                if has_phi2:
                    phi2_work = np.concatenate([phi2_work, phi2_extra])
                if has_vlos:
                    vlos_work = np.concatenate([vlos_work, vlos_extra])

        injected.append(dict(kind="gaussian_gap_plus_over", center=float(c),
                             sigma=float(sigma),
                             depth=float(depth),
                             amp=float(amp)))
    # final: optionally shuffle to avoid sorted blocks
    order = rng.permutation(len(phi1_work))
    phi1_new = phi1_work[order]
    phi2_new = phi2_work[order] if has_phi2 else None
    vlos_new = vlos_work[order] if has_vlos else None

    return phi1_new, phi2_new, vlos_new, injected


In [None]:
size = 50000
mu, phi2_sigma = 0, 1 # mean and standard deviation
vlos_sigma = 5
rng = np.random.default_rng()
phi1 = rng.uniform(-300, 300, size)
phi2 = rng.normal(mu, phi2_sigma, size)
vlos = rng.normal(mu, vlos_sigma, size)




size = 50000*10
mu, phi2_sigma = 0, 1 # mean and standard deviation
vlos_sigma = 5
rng = np.random.default_rng()
phi1_big = rng.uniform(-300, 300, size)
phi2_big = rng.normal(mu, phi2_sigma, size)
vlos_big = rng.normal(mu, vlos_sigma, size)



In [None]:
def make_streams(amount_low, amount_mid, amount_high, scale_low=100, scale_mid=10, scale_high=1):
     phi1_low, phi2_low, vlos_low, _ = inject_substructure(phi1, phi2=phi2, vlos=vlos, amount=amount_low, scale=scale_low)
     phi1_mid, phi2_mid, vlos_mid, _ = inject_substructure(phi1, phi2=phi2, vlos=vlos, amount=amount_mid, scale=scale_mid)
     phi1_high, phi2_high, vlos_high, _ = inject_substructure(phi1, phi2=phi2, vlos=vlos, amount=amount_high, scale=scale_high)
    
     S_low = (StellarStream.from_catalog(phi1_low, phi2_low, vlos_low, 'low stream'))
          # .select("restrict_phi1", phi1_lim=(-100, 0))
          # .select("restrict_phi2", phi2_lim=6.0))
     S_mid = (StellarStream.from_catalog(phi1_mid, phi2_mid, vlos_mid, 'mid stream'))
          # .select("restrict_phi1", phi1_lim=(-100, 0))
          # .select("restrict_phi2", phi2_lim=6.0))


     S_high = (StellarStream.from_catalog(phi1_high, phi2_high, vlos_high, 'high stream'))
          # .select("restrict_phi1", phi1_lim=(-100, 0))
          # .select("restrict_phi2", phi2_lim=6.0))

     S = (StellarStream.from_catalog(phi1, phi2, vlos, 'base stream'))
               # .select("restrict_phi1", phi1_lim=(-100, 0))
               # .select("restrict_phi2", phi2_lim=6.0))
               
     return S, S_low, S_mid, S_high


S, S_low, S_mid, S_high = make_streams(10, 10, 10)
S_low.plot_stream()
S_mid.plot_stream()
S_high.plot_stream()
plt.show()

In [None]:
phi1_low_big, phi2_low_big, vlos_low_big, _ = inject_substructure(phi1_big, phi2=phi2_big, vlos=vlos_big, amount=10, scale=100)
phi1_mid_big, phi2_mid_big, vlos_mid_big, _ = inject_substructure(phi1_big, phi2=phi2_big, vlos=vlos_big, amount=10, scale=10)
phi1_high_big, phi2_high_big, vlos_high_big, _ = inject_substructure(phi1_big, phi2=phi2_big, vlos=vlos_big, amount=10, scale=1)


S_low_big = (StellarStream.from_catalog(phi1_low_big, phi2_low_big, vlos_low_big, 'low stream * 10'))
     # .select("restrict_phi1", phi1_lim=(-100, 0))
     # .select("restrict_phi2", phi2_lim=6.0))


S_mid_big = (StellarStream.from_catalog(phi1_mid_big, phi2_mid_big, vlos_mid_big, 'mid stream * 10'))
     # .select("restrict_phi1", phi1_lim=(-100, 0))
     # .select("restrict_phi2", phi2_lim=6.0))


S_high_big = (StellarStream.from_catalog(phi1_high_big, phi2_high_big, vlos_high_big, 'high stream * 10'))
     # .select("restrict_phi1", phi1_lim=(-100, 0))
     # .select("restrict_phi2", phi2_lim=6.0))

S_big = (StellarStream.from_catalog(phi1_big, phi2_big, vlos_big, 'base stream * 10'))
          # .select("restrict_phi1", phi1_lim=(-100, 0))
          # .select("restrict_phi2", phi2_lim=6.0))



S_low.plot_stream()
S_mid.plot_stream()
S_high.plot_stream()
plt.show()

In [None]:
StellarStream.plot_power_spectrum(S_high, S_low, S_mid, S, precision=0.01)

In [None]:
StellarStream.plot_density(S, S_big)

In [None]:
freq, pwr = S.power_spectrum()
freq_big, pwr_big = S_big.power_spectrum()

mask = (freq > 0.1)

print(f'big: {np.mean(pwr_big[mask])}')
print(f'reg: {np.mean(pwr[mask])}')
plt.plot(freq, pwr, label="reg")
plt.plot(freq_big, pwr_big, label="big")
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.show()

In [None]:
from scipy.integrate import simpson

S, S_low, S_mid, S_high = make_streams(10, 10, 10, scale_low=100, scale_mid=10, scale_high=4)

p=1
freq, ps = S.power_spectrum(precision=p, use_bins=True)
_, low_ps = S_low.power_spectrum(precision=p, use_bins=True)
_, mid_ps = S_mid.power_spectrum(precision=p, use_bins=True)
_, high_ps = S_high.power_spectrum(precision=p, use_bins=True)

mask = (freq > 0)

diff_high = np.abs(ps - high_ps)
diff_high_normalized = diff_high #/ simpson(diff_high, x=freq)  # Normalize
diff_low = np.abs(ps - low_ps)
diff_low_normalized = diff_low #/ simpson(diff_low, x=freq)  # Normalize
diff_mid = np.abs(ps - mid_ps)
diff_mid_normalized = diff_mid #/ simpson(diff_mid, x=freq)  # Normalize

plt.figure(figsize=(10, 6), dpi=300)
plt.plot(freq[mask], diff_high_normalized[mask], label='High Frequency Difference')
plt.plot(freq[mask], diff_low_normalized[mask], label='Low Frequency Difference')
plt.plot(freq[mask], diff_mid_normalized[mask], label='Mid Frequency Difference')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.grid()
plt.show()

In [None]:
from scipy.integrate import simpson
from scipy.ndimage import gaussian_filter1d

def substructures_1(bins: npt.NDArray, dens: npt.NDArray) -> npt.NDArray:
    widths = np.diff(bins)
    bin_width = widths[0]  # uniform because linspace

    bins = []
    results = []
    for i in range(1, 300):
        sigma_bins = float(i/100) / bin_width
        bins.append(sigma_bins * bin_width)
        dens_new = gaussian_filter1d(dens, sigma=sigma_bins)
        results.append(simpson(np.abs(dens_new - dens)))
        dens = dens_new
        
    bins = np.array(bins)
    results = np.array(results)/simpson(results, x=bins)  # Normalize by the maximum frequency
    return bins, results

def substructures_2(bins: npt.NDArray, dens: npt.NDArray) -> npt.NDArray:
    widths = np.diff(bins)
    bin_width = widths[0]  # uniform because linspace

    bins = []
    results = []
    for i in range(1, 300):
        sigma_bins = float(i/100) / bin_width
        bins.append(sigma_bins * bin_width)
        dens_new = gaussian_filter1d(dens, sigma=sigma_bins)
        results.append(simpson(np.abs(dens_new - dens)))
        
    bins = np.array(bins)[:-1]
    results = np.diff(np.array(results))
    
    return bins, results/simpson(results, x=bins)  # Normalize




# bins_1, results_1 = substructures_1(*S_high.density_phi1())
# bins_2, results_2 = substructures_2(*S_high.density_phi1())

bins, results = substructures_2(*S.density_phi1())
bins_low, results_low = substructures_2(*S_low.density_phi1())
bins_mid, results_mid = substructures_2(*S_mid.density_phi1())
bins_high, results_high = substructures_2(*S_high.density_phi1())

# plt.plot(bins_1, results_1, label='1')
# plt.plot(bins_2, results_2, label='2')

plt.plot(bins_low, results_low, label='low freq')
plt.plot(bins_mid, results_mid, label='mid freq')
plt.plot(bins_high, results_high, label='high freq')

# plt.xscale('log')
# plt.yscale('log')
plt.legend()


print(f"low mean: {np.mean(results_low)}")
print(f"mid mean: {np.mean(results_mid)}")
print(f"high mean: {np.mean(results_high)}")

In [None]:
size = 1000000
mu, phi2_sigma = 0, 1 # mean and standard deviation
rng = np.random.default_rng()
x = rng.uniform(-300, 300, size)# * np.exp(-0.01 * np.abs(rng.uniform(-300, 300, size)))
y = rng.normal(mu, phi2_sigma, size)

bins = 50

edges = np.linspace(np.min(x), np.max(x), bins + 1)
centers = (edges[:-1] + edges[1:]) / 2
counts, _ = np.histogram(x, bins=edges)

widths = np.diff(edges)
dens = counts / widths

# normalize so integral(dens dx) == 1
area = np.sum(dens * widths)
#area = simpson(dens, x=centers)
if area > 0:
    dens = dens / area
else:
    # area zero (unlikely unless dens all zero) -> return zeros
    dens = np.zeros_like(dens)
    
plt.plot(centers, dens)
plt.show()

In [None]:
from scipy.integrate import simpson

freq, ps = S.power_spectrum()
_, low_ps = S_low.power_spectrum()
_, mid_ps = S_mid.power_spectrum()
_, high_ps = S_high.power_spectrum()

mask = (freq > 0)

low_ps_normalized = low_ps / simpson(low_ps, x=freq)  # Normalize
mid_ps_normalized = mid_ps / simpson(mid_ps, x=freq)  # Normalize
high_ps_normalized = high_ps / simpson(high_ps, x=freq)  # Normalize

plt.figure(figsize=(10, 6), dpi=300)
plt.plot(freq[mask], high_ps_normalized[mask], label='High Frequency Difference')
plt.plot(freq[mask], low_ps_normalized[mask], label='Low Frequency Difference')
plt.plot(freq[mask], mid_ps_normalized[mask], label='Mid Frequency Difference')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.grid()
plt.show()

In [None]:

from scipy.integrate import simpson

p = 0.01
x_min, x_max = 10**-2.3, 10**-1.2

freq, ps = S.power_spectrum(precision=p)
_, low_ps = S_low.power_spectrum(precision=p)
_, mid_ps = S_mid.power_spectrum(precision=p)
_, high_ps = S_high.power_spectrum(precision=p)

smallest_len = min(len(ps), len(low_ps), len(mid_ps), len(high_ps))
min_len = slice(0, smallest_len)

low_ps_matched = low_ps[min_len]  # match lengths
mid_ps_matched = mid_ps[min_len]  # match lengths
high_ps_matched = high_ps[min_len]  # match lengths

freq_matched = freq[min_len]  # match lengths

mask = (freq_matched > 0)

low_ps = low_ps_matched[mask]  # match lengths
mid_ps = mid_ps_matched[mask]  # match lengths
high_ps = high_ps_matched[mask]  # match lengths

freq = freq_matched[mask]  # match lengths

mask &= (freq_matched > x_min) & (freq_matched < x_max)

low_ps_filtered = low_ps_matched[mask]  # match lengths
mid_ps_filtered = mid_ps_matched[mask]  # match lengths
high_ps_filtered = high_ps_matched[mask]  # match lengths

freq_filtered = freq_matched[mask]  # match lengths

smallest_len = min(len(freq_filtered), len(low_ps_filtered), len(mid_ps_filtered), len(high_ps_filtered))
min_len = slice(0, smallest_len)

low_ps_filtered = low_ps_filtered[min_len]  # match lengths
mid_ps_filtered = mid_ps_filtered[min_len]  # match lengths
high_ps_filtered = high_ps_filtered[min_len]  # match lengths

freq_filtered = freq_filtered[min_len]  # match lengths


fig, ax = plt.subplots(2, 1, figsize=(10, 8), dpi=300)


ax[0].plot(freq, high_ps, label='High Frequency Difference')
ax[0].plot(freq, low_ps, label='Low Frequency Difference')
ax[0].plot(freq, mid_ps, label='Mid Frequency Difference')

ax[0].axvline(x=(10**-2.3), color='red', linestyle='dotted', label='begin')
ax[0].axvline(x=(10**-1.2), color='red', linestyle='dotted', label='end')


ax[1].plot(freq_filtered, high_ps_filtered, label='High Frequency Difference')
ax[1].plot(freq_filtered, low_ps_filtered, label='Low Frequency Difference')
ax[1].plot(freq_filtered, mid_ps_filtered, label='Mid Frequency Difference')

plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.grid()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import simpson
from typing import Tuple, Dict, Any

def plot_power_spectra(
    S, S_low, S_mid, S_high,
    precision: float = 0.01,
    window: str = 'boxcar',
    x_min: float = 10**-2.3,
    x_max: float = 10**-1.2,
    figsize: Tuple[int,int] = (10, 8),
    dpi: int = 300,
    compute_band_power: bool = True
) -> Dict[str, Any]:
    """
    Plot power spectra from four objects that expose `.power_spectrum(precision=...)`.
    Produces two subplots:
      1) full (positive-frequency) spectra
      2) spectra filtered to x_min < freq < x_max

    Returns a dict containing optionally computed band integrals (using Simpson).
    """
    # fetch spectra
    freq, ps = S.power_spectrum(precision=precision, window=window)
    _, low_ps  = S_low.power_spectrum(precision=precision, window=window)
    _, mid_ps  = S_mid.power_spectrum(precision=precision, window=window)
    _, high_ps = S_high.power_spectrum(precision=precision, window=window)

    # convert to numpy arrays and align to shortest length
    freq = np.asarray(freq)
    low_ps = np.asarray(low_ps)
    mid_ps = np.asarray(mid_ps)
    high_ps = np.asarray(high_ps)

    n = min(len(freq), len(low_ps), len(mid_ps), len(high_ps))
    freq = freq[:n]
    low_ps = low_ps[:n]
    mid_ps = mid_ps[:n]
    high_ps = high_ps[:n]

    # only keep positive frequencies (required for log scale)
    positive_mask = freq > 0
    freq = freq[positive_mask]
    low_ps = low_ps[positive_mask]
    mid_ps = mid_ps[positive_mask]
    high_ps = high_ps[positive_mask]

    # filter to desired band
    band_mask = (freq > x_min) & (freq < x_max)
    freq_band = freq[band_mask]
    low_band = low_ps[band_mask]
    mid_band = mid_ps[band_mask]
    high_band = high_ps[band_mask]

    # prevent plotting/log-scale errors: turn non-positive power values into NaN
    def sanitize(y: np.ndarray) -> np.ndarray:
        y = np.asarray(y, dtype=float)
        y[y <= 0] = np.nan
        return y

    low_ps, mid_ps, high_ps = map(sanitize, (low_ps, mid_ps, high_ps))
    low_band, mid_band, high_band = map(sanitize, (low_band, mid_band, high_band))

    # plotting
    fig, axs = plt.subplots(2, 1, figsize=figsize, dpi=dpi)

    axs[0].plot(freq, high_ps, label='High Frequency')
    axs[0].plot(freq, mid_ps,  label='Mid Frequency')
    axs[0].plot(freq, low_ps,  label='Low Frequency')
    axs[0].axvline(x=x_min, color='red', linestyle='dotted', label='band start')
    axs[0].axvline(x=x_max, color='red', linestyle='dotted', label='band end')
    axs[0].set_xscale('log')
    axs[0].set_yscale('log')
    axs[0].set_title('Full Spectrum')
    axs[0].legend()
    axs[0].grid(True)

    axs[1].plot(freq_band, high_band, label='High Frequency')
    axs[1].plot(freq_band, mid_band,  label='Mid Frequency')
    axs[1].plot(freq_band, low_band,  label='Low Frequency')
    axs[1].set_xscale('log')
    axs[1].set_yscale('log')
    axs[1].set_xlim(x_min, x_max)
    axs[1].set_title(f'Filtered Spectrum ({x_min:.2e} – {x_max:.2e})')
    axs[1].legend()
    axs[1].grid(True)

    plt.tight_layout()
    plt.show()

    results: Dict[str, Any] = {}
    # optionally compute integrated power in the band using Simpson's rule
    if compute_band_power:
        def integrate_if_possible(f, y):
            if len(f) >= 2 and np.isfinite(y).any():
                # require at least two finite points to integrate
                finite_mask = np.isfinite(y) & np.isfinite(f)
                if finite_mask.sum() >= 2:
                    return simpson(y[finite_mask], x=f[finite_mask])
            return np.nan

        results['band_power'] = {
            'low':   integrate_if_possible(freq_band, low_band),
            'mid':   integrate_if_possible(freq_band, mid_band),
            'high':  integrate_if_possible(freq_band, high_band),
        }

    return results


In [None]:
band_integrals = plot_power_spectra(S, S_low, S_mid, S_high, precision=0.05,window='hann', x_min=0.005, x_max=0.07)
print(band_integrals)

In [None]:
band_integrals = plot_power_spectra(S, S_low, S_mid, S_high, precision=0.05,window='hann', x_min=0.01, x_max=0.1)
print(band_integrals)

In [None]:
from typing import Callable, Tuple, Optional
import numpy.typing as npt

def stream_with_structure(
    size: int = 50000,
    mu: float = 0.0,
    phi2_sigma: float = 1.0,
    vlos_sigma: float = 5.0,
    f: Callable[[float], float] = lambda freq: freq,
    freqs: Optional[npt.NDArray] = None,
    rng: Optional[np.random.Generator] = None,
) -> Tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
    """
    Generate phi1, phi2, vlos arrays and (optionally) inject substructure at the scales in `freqs`.

    Parameters
    ----------
    size
        Number of samples.
    mu
        Mean used for phi2 and vlos initial draws (kept the same as original variable).
    phi2_sigma
        Standard deviation for phi2.
    vlos_sigma
        Standard deviation for vlos.
    f
        Function mapping a scalar frequency/scale -> scalar amount passed to inject_substructure.
    freqs
        Iterable/array of frequencies/scales to loop over. If None or empty, no injection is performed.
    rng
        Optional numpy.random.Generator. If None, a default-rng is created.

    Returns
    -------
    phi1, phi2, vlos
        Numpy arrays after optional injection steps.
    """
    if rng is None:
        rng = np.random.default_rng()

    # initialize the streams
    phi1 = rng.uniform(-300.0, 300.0, size)
    phi2 = rng.normal(loc=mu, scale=phi2_sigma, size=size)
    vlos = rng.normal(loc=mu, scale=vlos_sigma, size=size)

    # normalize freqs argument to a 1D numpy array (empty -> no injection)
    if freqs is None:
        freqs = np.array([], dtype=float)
    else:
        freqs = np.asarray(freqs, dtype=float).ravel()

    # Loop over frequency/scale values and call inject_substructure.
    # The function `inject_substructure` is expected to accept (phi1, phi2=..., vlos=..., amount=..., scale=...)
    # and to return a tuple like (phi1, phi2, vlos, extra).
    for freq in freqs:
        amount = float(f(freq))  # ensure scalar float
        phi1, phi2, vlos, _ = inject_substructure(
            phi1, phi2=phi2, vlos=vlos, amount=amount, scale=float(freq)
        )

    return StellarStream.from_catalog(phi1, phi2, vlos, 'stream with structure')


stream_with_structure(freqs=np.linspace(-10, 10, 100), f=lambda freq: np.exp(-freq**2)).plot_stream()