# Mind-to-Script â€” End-to-End Demo (Synthetic)

This notebook demonstrates an end-to-end flow using synthetic EEG data:
1. Create a canonical synthetic ZuCo-style `.pkl` file
2. Build shards from canonical files (using the repository script)
3. Create and save a bridge checkpoint (encoder + projection)
4. Load the bridge into the runtime model with a dummy decoder/tokenizer
5. Run inference on a synthetic EEG epoch

This demo uses a lightweight dummy decoder to avoid downloading a large HF model.

In [None]:
import os
from pathlib import Path
import pickle
import numpy as np

ROOT = Path('..').resolve() / 'Neuralinked'
BASE = Path('.').resolve()
os.makedirs('data/canonical', exist_ok=True)
os.makedirs('models', exist_ok=True)
os.makedirs('data/shards', exist_ok=True)
print('Working dir:', BASE)


In [None]:
# 1) Create a synthetic canonical pickle
sfreq = 500.0
n_channels = 8
duration_s = 2.0
n_samples = int(sfreq * duration_s)
signals = np.random.randn(n_channels, n_samples).astype(np.float32)
sentences = [
    {
        'text': 'hello world',
        'words': ['hello', 'world'],
        'onsets': [0.1, 1.0],
        'offsets': [0.6, 1.5]
    }
]
canonical = {
    'signals': signals,
    'sfreq': sfreq,
    'ch_names': [f'EEG{i}' for i in range(n_channels)],
    'sentences': sentences,
    'meta': {'subject': 'subj_synth'}
}
p = Path('data/canonical/subj_synth_file1.pkl')
with open(p, 'wb') as f:
    pickle.dump(canonical, f)
print('Wrote canonical pickle:', p)


In [None]:
# 2) Build shards using the repository script (invokes build_manifest_and_shards.py)
import subprocess
subprocess.run(['python', 'scripts/build_manifest_and_shards.py', '--canonical', 'data/canonical', '--out', 'data/shards', '--version', 'v0.0.1', '--shard-size', '16'], check=True)
print('Shards built under data/shards/v0.0.1')


In [None]:
# 3) Create and save a bridge checkpoint (encoder + projection)
from app.model import EEGEncoder
import torch
cnn_channels = 32
lstm_hidden = 64
in_channels = n_channels
encoder = EEGEncoder(in_channels=in_channels, cnn_channels=cnn_channels, lstm_hidden=lstm_hidden)
projection = torch.nn.Linear(lstm_hidden*2, 128)  # dummy d_model=128 for demo
ckpt = {
    'encoder_state_dict': encoder.state_dict(),
    'projection_state_dict': projection.state_dict(),
    'config': {'in_channels': in_channels, 'cnn_channels': cnn_channels, 'lstm_hidden': lstm_hidden, 'hf_model_name': 'dummy'}
}
torch.save(ckpt, 'models/bridge.pt')
print('Saved bridge checkpoint to models/bridge.pt')


In [None]:
# 4) Load the bridge into the runtime model and attach dummy tokenizer/decoder
from app.model import MindToScriptModel
import torch

class DummyTokenizer:
    def batch_decode(self, sequences, skip_special_tokens=True):
        return ['decoded text'] * sequences.shape[0]

class DummyDecoder:
    def __init__(self, d_model=128, vocab_size=100):
        self.config = type('C', (), {'d_model': d_model})
        self.vocab_size = vocab_size
    def generate(self, encoder_outputs=None, max_length=32, num_beams=1, return_dict_in_generate=True, output_scores=True):
        batch = encoder_outputs.last_hidden_state.shape[0]
        sequences = torch.randint(0, self.vocab_size, (batch, 5))
        scores = [torch.randn(batch, self.vocab_size) for _ in range(4)]
        return type('G', (), {'sequences': sequences, 'scores': scores})()

m = MindToScriptModel(device='cpu')
m.tokenizer = DummyTokenizer()
m.decoder = DummyDecoder(d_model=128)
# load bridge checkpoint manually
ck = torch.load('models/bridge.pt', map_location='cpu')
m._bridge_config = ck.get('config', {})
m._bridge_ckpt = 'models/bridge.pt'
m._ensure_encoder(in_channels)
print('Model bridge loaded (encoder + projection).')


In [None]:
# 5) Run inference on a synthetic epoch
sig = np.random.randn(in_channels, 256).astype(float)
texts, confidences = m.predict(sig)
print('Predicted text:', texts[0])
print('Confidence:', confidences[0])


You can now inspect `data/shards/v0.0.1/` for shard files and `models/bridge.pt` for the bridge checkpoint.
Extend this notebook to run multiple synthetic examples, or replace the dummy decoder with a real HF model if you have network access.