- Time to concatenate 24 hours of LFP (12x 2hr files) = 30 minutes. 
- Size of 24 hours of LFP = ~160 GB

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import subprocess
from pathlib import Path
from datetime import datetime
from joblib import Parallel, delayed
from ecephys.sglx_utils.cat_gt import get_catGT_command

In [None]:
def get_run_specs(subject, condition_group, rerun_existing=True):
    """If anyone reads this, sorry"""
    from ecephys_analyses.data import paths
    from ecephys_analyses.data.channel_groups import full_names
    from ecephys.data.paths import parse_sglx_stem
    conditions = paths.get_conditions(subject, condition_group)
    datapath_dict = paths.load_datapath_yaml()
    all_run_specs = []
    for cond in conditions:
        if not rerun_existing:
            output_ap_meta = paths.get_sglx_style_datapaths(
                subject, cond, 'ap.meta', catgt_data=True
            )
            if not len(output_ap_meta) == 1:
                raise ValueError(f"Multiple output meta files for {subject} {cond}."
                                 f"Only run catgt on non-combined conditions")
            if output_ap_meta[0].exists():
                print(f"Rerun=False: pass {subject} {cond}", end="")
                print(f"(found ap.meta at {output_ap_meta[0]}")
                continue
        cond_spec = datapath_dict[subject][cond]
        # Name
        full_name = full_names[subject]
        # exp
        assert len(list(cond_spec.keys())) == 1
        exp = list(cond_spec.keys())[0]
        # run
        stems = cond_spec[exp]
        runs, gates, trigs, probes = zip(*[
            parse_sglx_stem(stem) for stem in stems 
        ])
        assert len(set(runs)) == 1  #TODO
        assert len(set(probes)) == 1  #TODO
        assert len(set(gates)) == 1  #TODO
        sorted_trigs = sorted([int(t.split('t')[1]) for t in trigs])
        trg_str = ','.join([str(t) for t in sorted_trigs])  # "0,1,2,8" or "0"
        run, gate, trig, probe = parse_sglx_stem(stems[0])
        all_run_specs.append(
            tuple([subject, exp, run, {
                'g': gate.split('g')[1],
                'prb': probe.split('imec')[1],
                't': trg_str,
            }])
        )
    print("...")
    return all_run_specs

In [None]:
# ----------
# USER

analysis_id = 'tom/catgt'

catgt_base_cfg = {
    'aphipass': 300,
    'aplopass': 9000,
    'gbldmx': True,
    'gfix': '0.40,0.10,0.02',
}  # Applied to all

sync_channel = 384  # Applied to all

###
####
######## Manual
# Subject, experiment, run, catgt_cfg
# catGT_cfg must contains the following keys: run, t, g, prb, 
# all_run_specs = [
#     ('CNPIX3-Valentino', '2-19-2020', '2-19-2020',    {'g': '1', 'prb': '0', 't': '4,4',}), 
# ]
########## Auto generation of bins to process

# [(<subject>, <condition_group>), ]
condition_groups = [
    ('Doppio', 'SD_catgt'),
    ('Allan', 'SD_catgt'),
    ('Luigi', 'SD_catgt'),
]

rerun_existing=False  # Pass if output meta file exists

all_run_specs = []
for subject, condition_group in condition_groups:
    all_run_specs += get_run_specs(subject, condition_group, rerun_existing=rerun_existing)
print(f"N runs (rerun_existing={rerun_existing}) = {len(all_run_specs)}")
print(f"run specs (first 5). ```subject, experiment, run, catgt_cfg``` Looks good?: {all_run_specs[0:5]}")

####
####
####


ap = True
lf = False

n_jobs = 1

dry_run = True

# END USER
# ----------

assert lf == False or rerun_existing  # Rerun logic only checks ap.meta

In [None]:
# Keys in roots.yml
SRC_ROOT_KEY = 'raw_chronic'
TGT_ROOT_KEY = 'catgt'

In [None]:
CATGT_PATH = "/Volumes/scratch/neuropixels/bin/CatGT-linux/runit.sh"

In [None]:
def run_catgt(run_specs, dry_run=dry_run):
    subj_id, exp_id, run_id, run_catgt_cfg = run_specs
    
    catgt_cfg = {
        **run_catgt_cfg,
        **catgt_base_cfg,
    }
    
    for key in ['g', 't', 'prb']:
        assert key in catgt_cfg
    
    src_dir = paths.get_subject_root(subject, SRC_ROOT_KEY)/exp_id
    dest_dir = paths.get_subject_root(subject, TGT_ROOT_KEY)/exp_id
    
    cmd = get_catGT_command(
        catGT_path=CATGT_PATH,
        wine_path=None,
        dir=str(src_dir),
        dest=str(dest_dir),
        run=run_id,
        ap=ap,
        lf=lf,
        SY=f"{catgt_cfg['prb']},{sync_channel},6,500",
        prb_fld=True,
        out_prb_fld=True,
        **catgt_cfg,
    )

    start = datetime.now()
    print(f"Running {cmd}")
    if dry_run:
        print("Dry run: doing nothing")
    else:
        dest_dir.mkdir(parents=True, exist_ok=True)
        process = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    end = datetime.now()

    print(f"{end.strftime('%H:%M:%S')}: Finished {subj_id}, {run_id}. Run time = {str(end - start)}")

In [None]:
if n_jobs == 1:
    for run_specs in all_run_specs:
        print(run_specs)
        run_catgt(run_specs, dry_run=dry_run)
else:
    parallel = Parallel(
        n_jobs=n_jobs,
        backend='multiprocessing',
    )(delayed(run_catgt)(run_specs, dry_run=dry_run) for run_specs in all_run_specs)