In [1]:
"""
demo_workflow.py

A short script illustrating how to:
  1) Create simulated continuous‐rate signals (DataFrame)
  2) Convert them into a RecordingBundle via df_to_recording_bundle
  3) Add synthetic spike‐train arrays to the RecordingBundle
  4) Run to_decoder_batch to produce a time‐aligned DecoderBatch
  5) Slice and select parts of the DecoderBatch
  6) Validate that required sources exist

Requires:
    numpy, pandas, sklearn (for preprocessing), and the non_local_detector package
"""

import numpy as np
import pandas as pd

from non_local_detector.bundle import RecordingBundle, DecoderBatch, validate_sources
from non_local_detector.io.preprocessing import df_to_recording_bundle, to_decoder_batch

# ------------------------------------------------------------
# 1) Simulate continuous‐rate signals as a pandas DataFrame
# ------------------------------------------------------------
# Suppose we have two analog channels ("lfp1", "lfp2") sampled uniformly at 500 Hz
fs = 500.0  # Hz
n_seconds = 2.0
n_samples = int(n_seconds * fs)
time_index = np.linspace(0, n_seconds, n_samples, endpoint=False)

# Create two sine‐wave signals (plus a bit of noise)
lfp1 = np.sin(2 * np.pi * 10 * time_index) + 0.1 * np.random.randn(n_samples)
lfp2 = np.cos(2 * np.pi * 20 * time_index) + 0.1 * np.random.randn(n_samples)

df = pd.DataFrame(
    {
        "lfp1": lfp1,
        "lfp2": lfp2,
    },
    index=time_index.astype(float),
)

print("=== 1) DataFrame head ===")
print(df.head(), "\n")
print(f"Index dtype: {df.index.dtype}, length: {len(df)}\n")

# ------------------------------------------------------------
# 2) Convert DataFrame → RecordingBundle.signals
# ------------------------------------------------------------
# We rely on df_to_recording_bundle to infer the 500 Hz sampling rate automatically.
rb_cont = df_to_recording_bundle(df)
print("=== 2) RecordingBundle.signals keys & metadata ===")
for key, ts in rb_cont.signals.items():
    print(
        f"  • {key!r}: data.shape={ts.data.shape}, sampling_rate_hz={ts.sampling_rate_hz:.1f}, start_s={ts.start_s:.3f}"
    )
print()

# ------------------------------------------------------------
# 3) Add synthetic spike trains (two “neurons”)
# ------------------------------------------------------------
# Create two Poisson‐like spike trains over the same 2‐second interval
# Here we just randomly generate spike times in [0, 2).
rng = np.random.default_rng(seed=42)
n_spikes_neuron0 = 30
n_spikes_neuron1 = 20

spike_times_0 = np.sort(rng.uniform(0, n_seconds, size=n_spikes_neuron0))
spike_times_1 = np.sort(rng.uniform(0, n_seconds, size=n_spikes_neuron1))

# The RecordingBundle API expects a list of numpy arrays for spike_times_s
rb = RecordingBundle(
    spike_times_s=[spike_times_0, spike_times_1],
    spike_waveforms=None,  # no waveforms for this demo
    signals=rb_cont.signals,  # reuse the continuous signals from step (2)
)
print("=== 3) RecordingBundle summary ===")
print(f"  • Number of neurons: {len(rb.spike_times_s)}")
print(f"  • First 5 spike times (neuron 0): {rb.spike_times_s[0][:5]}")
print(f"  • First 5 spike times (neuron 1): {rb.spike_times_s[1][:5]}")
print(f"  • Continuous signals: {list(rb.signals.keys())}\n")

# ------------------------------------------------------------
# 4) Build a DecoderBatch (bin_width = 0.01 s = 10 ms)
# ------------------------------------------------------------
bin_width_s = 0.01
batch = to_decoder_batch(
    rb,
    bin_width_s=bin_width_s,
    signals_to_use=["lfp1", "lfp2", "counts"],  # “counts” tells it to bin spike_times_s
    count_method="hist",  # default histogram
    float_downsample="mean",
    float_fill="ffill",
    int_fill="pad_zero",
    bool_fill="or",
    nan_policy="warn",
    one_hot_categories=False,  # not relevant since no categorical streams here
)
print("=== 4) DecoderBatch contents ===")
print(f"  • n_time bins: {batch.n_time}")
print(f"  • bin_edges_s (first 5): {batch.bin_edges_s[:5]}")
print(
    f"  • 'counts' array shape: {batch.counts.shape}  (neurons = columns = {batch.counts.shape[1]})"
)
print(f"  • 'lfp1' shape: {batch.signals['lfp1'].shape}")
print(f"  • 'lfp2' shape: {batch.signals['lfp2'].shape}\n")

# Show a few time bins and their spike counts
print("  Counts (first 10 bins):")
print(batch.counts[:10, :], "\n")

# ------------------------------------------------------------
# 5) Slice and select parts of the DecoderBatch
# ------------------------------------------------------------
# a) Slice bins 50 through 100 (i.e., 0.50s → 1.00s)
start_bin = 50
stop_bin = 100
sliced = batch.slice(start_bin, stop_bin, slice_spikes=True)
print(f"=== 5a) Sliced DecoderBatch (bins {start_bin}:{stop_bin}) ===")
print(f"  • New n_time = {sliced.n_time}  (should be {stop_bin - start_bin})")
print(f"  • Sliced counts shape: {sliced.counts.shape}")
print(f"  • Sliced lfp1 shape: {sliced.signals['lfp1'].shape}\n")

# b) Select only “lfp2” from the sliced batch
sub_signals = sliced.select_signals(["lfp2"])
print("=== 5b) select_signals(['lfp2']) on sliced ===")
print(f"  • Keys now: {list(sub_signals.signals.keys())}")
print(f"  • lfp2 shape: {sub_signals.signals['lfp2'].shape}\n")

# c) Select only neuron #1’s spikes (from the original batch)
selected_spikes = batch.select_spikes([1])
print("=== 5c) select_spikes([1]) ===")
print(f"  • Remaining neurons: {len(selected_spikes.spike_times_s)}")
print(f"  • First 5 spike times (neuron 1): {selected_spikes.spike_times_s[0][:5]}\n")


# ------------------------------------------------------------
# 6) Validate that a hypothetical model’s required sources are present
# ------------------------------------------------------------
class ExampleModel:
    required_sources = ["counts", "lfp1", "lfp2", "calcium"]


try:
    validate_sources(batch, [ExampleModel()])
except ValueError as e:
    print("=== 6) validate_sources error ===")
    print(e)


# If we only require "counts" and "lfp1", it should pass
class AnotherModel:
    required_sources = ["counts", "lfp1"]


validate_sources(batch, [AnotherModel()])
print("=== 6) validate_sources success for ['counts','lfp1'] ===\n")

# ------------------------------------------------------------
# Done
# ------------------------------------------------------------

=== 1) DataFrame head ===
           lfp1      lfp2
0.000 -0.027190  0.912882
0.002  0.094207  0.957687
0.004  0.109566  1.029285
0.006  0.283764  0.950148
0.008  0.402577  0.510376 

Index dtype: float64, length: 1000

=== 2) RecordingBundle.signals keys & metadata ===
  • 'lfp1': data.shape=(1000,), sampling_rate_hz=500.0, start_s=0.000
  • 'lfp2': data.shape=(1000,), sampling_rate_hz=500.0, start_s=0.000

=== 3) RecordingBundle summary ===
  • Number of neurons: 2
  • First 5 spike times (neuron 0): [0.08760753 0.12763451 0.1883547  0.25622727 0.30857898]
  • First 5 spike times (neuron 1): [0.25984301 0.27950497 0.37894272 0.4538187  0.57665621]
  • Continuous signals: ['lfp1', 'lfp2']

=== 4) DecoderBatch contents ===
  • n_time bins: 201
  • bin_edges_s (first 5): [0.   0.01 0.02 0.03 0.04]
  • 'counts' array shape: (201, 2)  (neurons = columns = 2)
  • 'lfp1' shape: (201,)
  • 'lfp2' shape: (201,)

  Counts (first 10 bins):
[[0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]