# Kaggle Inference Export Notebook — Brain-to-Text RNN

Instructions:
- In Kaggle, create a new Notebook with GPU enabled.
- Add these Datasets:
  - Dryad data bundle (e.g., `brain-to-text-dryad-bundle`)
  - This repository code (e.g., `nejm-brain-to-text`)
  - The Output from the Training notebook (contains `trained_models`)
- Internet: Off

This notebook loads the best checkpoint and exports phoneme logits for val/test for off‑Kaggle LM decoding.



In [None]:
# Prepare repo and data from attached Datasets
cp -r /kaggle/input/nejm-brain-to-text /kaggle/working/nejm-brain-to-text

mkdir -p /kaggle/working/nejm-brain-to-text/data
cp -r /kaggle/input/brain-to-text-dryad-bundle/hdf5_data_final /kaggle/working/nejm-brain-to-text/data/
cp /kaggle/input/brain-to-text-dryad-bundle/t15_copyTaskData_description.csv /kaggle/working/nejm-brain-to-text/data/

# Copy trained models from Training notebook output dataset
# Replace the dataset name placeholder below with your actual output dataset slug
cp -r /kaggle/input/<REPLACE_WITH_TRAINING_OUTPUT_DATASET>/trained_models /kaggle/working/nejm-brain-to-text/model_training/

ls -la /kaggle/working/nejm-brain-to-text/model_training/trained_models/baseline_rnn/checkpoint | head -n 50


In [None]:
# Minimal dependencies for export
pip install -q --no-cache-dir \
  numpy==2.0.2 pandas==2.2.2 \
  omegaconf==2.3.0 tqdm==4.66.4 h5py==3.10.0

pip install -q -e /kaggle/working/nejm-brain-to-text


In [None]:
# Export logits for val and test
import os, numpy as np, torch
from omegaconf import OmegaConf
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from model_training.evaluate_model_helpers import load_h5py_file
from model_training.rnn_model import GRUDecoder

repo = "/kaggle/working/nejm-brain-to-text"
data_dir = f"{repo}/data/hdf5_data_final"
csv_path = f"{repo}/data/t15_copyTaskData_description.csv"
model_dir = f"{repo}/model_training/trained_models/baseline_rnn"
args_yaml = f"{model_dir}/checkpoint/args.yaml"
out_dir = f"{repo}/model_training/logits_export"
Path(out_dir).mkdir(parents=True, exist_ok=True)

args = OmegaConf.load(args_yaml)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = GRUDecoder(
    neural_dim = args['model']['n_input_features'],
    n_units = args['model']['n_units'],
    n_days = len(args['dataset']['sessions']),
    n_classes = args['dataset']['n_classes'],
    rnn_dropout = args['model']['rnn_dropout'],
    input_dropout = args['model']['input_network']['input_layer_dropout'],
    n_layers = args['model']['n_layers'],
    patch_size = args['model']['patch_size'],
    patch_stride = args['model']['patch_stride'],
).to(device)

ckpt = torch.load(f"{model_dir}/checkpoint/best_checkpoint", map_location=device, weights_only=False)
for key in list(ckpt['model_state_dict'].keys()):
    ckpt['model_state_dict'][key.replace("module.", "")] = ckpt['model_state_dict'].pop(key)
    ckpt['model_state_dict'][key.replace("_orig_mod.", "")] = ckpt['model_state_dict'].pop(key)
model.load_state_dict(ckpt['model_state_dict'])
model.eval()

b2txt_csv_df = pd.read_csv(csv_path)

@torch.no_grad()
def export_split(eval_type):
    export = {}
    total = 0
    for session in args['dataset']['sessions']:
        files = os.listdir(os.path.join(data_dir, session))
        if f"data_{eval_type}.hdf5" not in files:
            continue
        data = load_h5py_file(os.path.join(data_dir, session, f"data_{eval_type}.hdf5"), b2txt_csv_df)
        export[session] = data
        total += len(data["neural_features"])

    with tqdm(total=total, desc=f"Export {eval_type} logits", unit="trial") as pbar:
        for session, data in export.items():
            sess_logits = []
            day_idx = torch.tensor([args['dataset']['sessions'].index(session)], device=device)
            for trial in range(len(data['neural_features'])):
                x = data['neural_features'][trial]
                x = np.expand_dims(x, axis=0)
                x = torch.tensor(x, device=device, dtype=torch.bfloat16)
                logits = model(x, day_idx)[0].float().cpu().numpy()  # T x C
                sess_logits.append(logits)
                pbar.update(1)
            np.save(os.path.join(out_dir, f"{session}_{eval_type}_logits.npy"), np.array(sess_logits, dtype=np.float32))

export_split("val")
export_split("test")
print("Saved to:", out_dir)



In [None]:
# Export logits to Notebook Output for download
mkdir -p /kaggle/working/export
cp -r /kaggle/working/nejm-brain-to-text/model_training/logits_export /kaggle/working/export/
ls -R /kaggle/working/export | head -n 60
