In [1]:
%load_ext autoreload
%autoreload 2

In [223]:
import ecephys_analyses as ea
import yaml
from pathlib import Path
import ecephys.data_mgmt.paths as mgmt
from itertools import chain
import re
import os

In [39]:
yaml_path = ea.package_datapath('example_org_spec.yaml')

In [40]:
with open(yaml_path) as fp:
    yaml_data = yaml.safe_load(fp)
doc = yaml_data

In [107]:
session = doc['recording-sessions'][0]
run = session['SpikeGLX-runs'][2]
gate = 'g1'
probe = 'imec0'

PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0')

In [216]:
def parse_sglx_fname(fname):
    """Parse recording identifiers from a SpikeGLX style filename stem.

    Paramters
    ---------
    fname: str
        The filename to parse, e.g. "my-run-name_g0_t1.imec2.lf.bin"

    Returns
    -------
    run: str
        The run name, e.g. "my-run-name".
    gate: str
        The gate identifier, e.g. "g0".
    trigger: str
        The trigger identifier, e.g. "t1".
    probe: str
        The probe identifier, e.g. "imec2"
    dtype: str
        The data type identifier, "lf" or "ap"
    ftype: str
        The file type identifier, "bin" or "meta"
    """
    x = re.search(r"_g\d+_t\d+\.imec\d+.(ap|lf).(bin|meta)\Z", fname)  # \Z forces match at string end.
    run = fname[: x.span()[0]]  # The run name is everything before the match
    gate = re.search(r"g\d+", x.group()).group()
    trigger = re.search(r"t\d+", x.group()).group()
    probe = re.search(r"imec\d+", x.group()).group()
    dtype = re.search(r"(ap|lf)", x.group()).group()
    ftype = re.search(r"(bin|meta)", x.group()).group()

    return (run, gate, trigger, probe, dtype, ftype)

In [217]:
fname = '3-1-2021_A_g1_t0.imec0.lf.meta'
parse_sglx_fname(fname)

('3-1-2021_A', 'g1', 't0', 'imec0', 'lf', 'meta')

In [235]:
probe_dir = Path(doc['raw-data-root'], session['directory'], f"{run}_{gate}", f"{run}_{gate}_{probe}")

In [267]:
def get_trigger_files(probe_dir):
    matches = [p for p in probe_dir.glob("*_g*_t*.imec[0-9].*.*") if (p.is_file() and re.search(r"_g\d+_t\d+\.imec\d+.(ap|lf).(bin|meta)\Z", p.name))]
    return sorted(matches)

get_trigger_files(probe_dir)

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.lf.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1.imec0.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1.imec0.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1.imec0.lf.bin'),
 Po

In [350]:
def get_unique_trigger_stems(probe_dir, with_probe=False):
    parses = [parse_sglx_fname(f.name) for f in get_trigger_files(probe_dir)]
    if with_probe:
        stems = [f"{run}_{gate}_{trigger}.{probe}" for run, gate, trigger, probe, dtype, ftype in parses]
    else:
        stems = [f"{run}_{gate}_{trigger}" for run, gate, trigger, probe, dtype, ftype in parses]
    return sorted(dict.fromkeys(stems))


print(get_unique_trigger_stems(probe_dir))
print(get_unique_trigger_stems(probe_dir, with_probe=True))

['3-1-2021_A_g1_t0', '3-1-2021_A_g1_t1']
['3-1-2021_A_g1_t0.imec0', '3-1-2021_A_g1_t1.imec0']


In [263]:
gate_dir = session_dir / f"{run}_{gate}"

In [293]:
#def get_probe_directories(session_dir, run, probe):
    #return [gate_dir / f"{gate_dir.name}_{probe}" for gate_dir in get_gate_directories(session_dir, run)]    
    
def get_probe_directories(gate_dir, probe_regex=r"imec\d+"): 
    search_string = r"_g\d+_" + probe_regex + r"\Z"
    matches = [p for p in gate_dir.glob(f"{gate_dir.name}_imec[0-9]") if (p.is_dir() and re.search(search_string, p.name))]
    return sorted(matches)

print(get_probe_directories(gate_dir))
print(get_probe_directories(gate_dir, probe_regex="imec0"))

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0'), PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec1')]
[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0')]


In [324]:
def get_gate_files(gate_dir):
    return list(
        chain.from_iterable(
            get_trigger_files(probe_dir)
            for probe_dir in get_probe_directories(gate_dir)
        )
    )
    
get_gate_files(gate_dir)

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.lf.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1.imec0.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1.imec0.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1.imec0.lf.bin'),
 Po

In [294]:
session_dir = Path(doc['raw-data-root'], session['directory'])

In [295]:
def get_gate_directories(session_dir, run):
    matches = [p for p in session_dir.glob(f"{run}_g*") if (p.is_dir() and re.search(r"_g\d+\Z", p.name))]
    return sorted(matches)

get_gate_directories(session_dir, run)

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1')]

In [325]:
def get_run_files(session_dir, run):
    return list(
        chain.from_iterable(
            get_gate_files(gate_dir)
            for gate_dir in get_gate_directories(session_dir, run)
        )
    )

get_run_files(session_dir, run)

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec0/3-1-2021_A_g0_t0.imec0.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec0/3-1-2021_A_g0_t0.imec0.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec0/3-1-2021_A_g0_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec0/3-1-2021_A_g0_t0.imec0.lf.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec1/3-1-2021_A_g0_t0.imec1.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec1/3-1-2021_A_g0_t0.imec1.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec1/3-1-2021_A_g0_t0.imec1.lf.bin'),
 Po

In [326]:
def get_session_files(session_dir, runs):
    return list(
        chain.from_iterable(get_run_files(session_dir, run) for run in runs)
    )

get_session_files(session_dir, session["SpikeGLX-runs"])

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.lf.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0.imec1.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0.imec1.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0.imec1.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/

In [329]:
def get_all_files(doc, **kwargs):
    return list(
        chain.from_iterable(
            get_session_files(
                Path(doc["raw-data-root"]) / session["directory"],
                session["SpikeGLX-runs"]
            )
            for session in doc["recording-sessions"]
        )
    )


get_all_files(doc)

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.lf.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0.imec1.ap.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0.imec1.ap.meta'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0.imec1.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/

In [332]:
def filter_files(files, run=None, gate=None, trigger=None, probe=None, dtype=None, ftype=None):
    
    def reject_file(fname):
        (_run, _gate, _trigger, _probe, _dtype, _ftype) = parse_sglx_fname(fname)
        reject = [(run and run != _run), (gate and gate != _gate), (probe and probe != _probe), (dtype and dtype != _dtype), (ftype and ftype != _ftype)]
        return any(reject)

    return [f for f in files if not reject_file(f.name)]

In [344]:
filter_files(get_all_files(doc), probe='imec0', dtype='lf', ftype='bin')

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021-a_g0/3-1-2021-a_g0_imec0/3-1-2021-a_g0_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec0/3-1-2021_A_g0_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_B_g0/3-1-2021_B_g0_imec0/3-1-2021_B_g0_t0.imec0.lf.bin'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_C_g0/3-1-2021_C_g0_imec0/3-1-2021_C_g0_t0.imec0.lf.bin'),
 PosixPath('

In [347]:
def remove_suffixes(files, regex=r"\.imec\d+\.(lf|ap)\.(bin|meta)"):
    return [f.parent / re.sub(regex, '', f.name) for f in files]

remove_suffixes(get_all_files(doc))

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec1/3-1-2021_g0_t0'),
 PosixPath('/Vol

In [354]:
def remove_duplicates(files):
    return list(dict.fromkeys(files))

remove_duplicates(remove_suffixes(filter_files(get_all_files(doc), probe='imec0', dtype='lf', ftype='bin')))

[PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_g0_imec0/3-1-2021_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021-a_g0/3-1-2021-a_g0_imec0/3-1-2021-a_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g0_imec0/3-1-2021_A_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1_t1'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_B_g0/3-1-2021_B_g0_imec0/3-1-2021_B_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_C_g0/3-1-2021_C_g0_imec0/3-1-2021_C_g0_t0'),
 PosixPath('/Volumes/neuropixel_archive/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_C_g0/3-1-2021_C_g0_

In [None]:
# Add warnings if expected directory or files do not exist
# Comment heavily