In [1]:
# TODOS:
# Add warnings if expected directory or files do not exist
# Comment heavily

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import ecephys_analyses as ea

yaml_path = ea.package_datapath('example_org_spec.yaml')

In [4]:
import yaml

with open(yaml_path) as fp:
    yaml_data = list(yaml.safe_load_all(fp))
docs = yaml_data

In [5]:
from pathlib import Path

doc = docs[0]
session = doc['recording-sessions'][0]
run = '3-1-2021_A'
gate = 'g1'
probe = 'imec0'
fname = '3-1-2021_A_g1_t0.imec0.lf.meta'
root_dir = Path(doc['raw-data-root'])
session_dir = root_dir / session['directory']
gate_dir = session_dir / f"{run}_{gate}"
probe_dir = Path(doc['raw-data-root'], session['directory'], f"{run}_{gate}", f"{run}_{gate}_{probe}")

# Test ecephys.sglx.paths

In [6]:
import ecephys.sglx.file_mgmt as fmgmt

In [16]:
fmgmt.parse_sglx_fname(fname);

In [17]:
fmgmt.get_trigger_files(probe_dir);

In [18]:
fmgmt.get_unique_trigger_stems(probe_dir);

In [19]:
fmgmt.get_probe_directories(gate_dir);

In [20]:
fmgmt.get_probe_directories(gate_dir, probe_regex="imec0");

In [21]:
fmgmt.get_gate_files(gate_dir);

In [22]:
fmgmt.filter_files(fmgmt.get_gate_files(gate_dir), probe='imec0', stream='lf', ftype='bin');

In [23]:
fmgmt.remove_suffixes(fmgmt.get_gate_files(gate_dir));

In [24]:
fmgmt.separate_files_by_probe(fmgmt.get_gate_files(gate_dir));

In [62]:
df1 = fmgmt.filelist_to_frame(fmgmt.get_gate_files(gate_dir));

In [190]:
fmgmt.read_metadata(df1.path.values);

In [194]:
fmgmt.filelist_to_frame(df1.path.values);

# Test ecephys.sglx.file_selection

In [25]:
import ecephys.sglx.session_org_utils as sou

In [26]:
sou._get_gate_directories(session_dir, run);

In [27]:
sou._get_run_files(session_dir, run);

In [28]:
sou._get_session_files(root_dir, session);

In [29]:
sou._get_document_files(doc);

In [30]:
sou.get_run_files(session_dir, run);

In [31]:
sou.get_session_files(root_dir, session);

In [32]:
sou.get_document_files(doc);

In [33]:
sou._get_experiment_files(doc, 'sleep-homeostasis');

In [34]:
sou.get_experiment_files(doc, 'sleep-homeostasis');

In [35]:
sou.get_alias_files(doc, 'sleep-homeostasis', 'light-period-circadian-match');

In [36]:
sou._get_yamlstream_files(docs);

In [49]:
df2 = sou.get_yamlstream_files(docs);

In [50]:
sou.get_files(docs, 'Allan', 'sleep-homeostasis', probe='imec0', stream='lf', ftype='bin');

In [227]:
df3 = sou.get_files(docs, 'Allan', 'sleep-homeostasis', alias='light-period-circadian-match', probe='imec0', stream='lf', ftype='bin');

# Under development

In [313]:
# Now write functions to get analysis paths, replacing existing path parts and inserting at various points along the hierarchy. 
# Could remove run ordering from yamlspec file entirely. Then ordering is lost for analysis files, and you must have a manifest with file order in the analysis drive. No big deal. 
# Functions for analysis file management and for raw data management should be completely separate, even if it results in redundancy. 
# Analysis directory should not be specified in raw data yamlspec file. 
# Perhaps analysis directory hierarchy should not mirror SGLX hierarchy. Perhaps most files should just be saved in session folders. Have a session column in the analysis drive manifest. 
# One group of analysis file management functions should get a single subject, experiment, or alias file with an arbitrary name. Another should get a group of SGLX-style named analysis files from a session folder. 
# Later, we can improve on what "SGLX style nameing" means. 
# Analysis folders (e.g. "spw-analysis") should be above subject folders in the hierarchy. 
# Add time+datetime support to aliases
# Just get a drop-in replacement working for now. Cut your losses. 
# Change files to paths where appropriate
# Or maybe, sessions should just be explicit lists of absolute file paths, and aliases as well. Easy to generate this kind of a file, given the code you've already written. 
# There should be a config.py file for setting analysis/raw data mount points. 

In [309]:
df = sou.get_files(docs, 'Allan', 'sleep-homeostasis', alias='light-period-circadian-match', probe='imec0', stream='lf', ftype='bin');

In [310]:
def validate_sglx_path(path):
    (run, gate, trigger, probe, stream, ftype) = fmgmt.parse_sglx_fname(path.name) 
    probe_dir = path.parent
    gate_dir = probe_dir.parent
    assert probe_dir.name == f"{run}_{gate}_{probe}"
    assert gate_dir.name == f"{run}_{gate}"
    return gate_dir, probe_dir.name, path.name
    
def _get_path_parts(path):
    gate_dir, probe_dirname, fname = validate_sglx_path(path)
    session_dir = gate_dir.parent
    subject_dir = session_dir.parent    
    return subject_dir, session_dir.name, gate_dir.name, probe_dir.name, fname

def get_analysis_counterpart(path, extension, analysis_subject_dir):
    subject_dir, session_dirname, gate_dirname, probe_dirname, fname = _get_path_parts(path)
    (run, gate, trigger, probe, stream, ftype) = fmgmt.parse_sglx_fname(fname)
    new_fname = f"{run}_{gate}.{probe}.{extension}"
    return analysis_subject_dir / session_dirname / gate_dirname / probe_dirname / new_fname

def get_analysis_counterparts(paths, extension, analysis_subject_dir):
    counterparts = [get_analysis_counterpart(p, extension, analysis_subject_dir) for p in paths]
    return list(dict.fromkeys(counterparts))

def get_path_parts(df):
    return pd.DataFrame.from_records(map(_get_path_parts, df.path.values), columns=['subject_dir', 'session_dirname', 'gate_dirname', 'probe_dirname', 'fname'])
    
def add_path_parts(df):
    return pd.concat([df, get_path_parts(df)], axis=1)

In [311]:
analysis_subject_dir = Path('/Volumes/neuropixel_analysis/Data/chronic/CNPIX8-Allan/')

In [312]:
get_analysis_counterparts(df.path.values, 'spg.nc', analysis_subject_dir)

[PosixPath('/Volumes/neuropixel_analysis/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_g0/3-1-2021_A_g1_imec0/3-1-2021_g0.imec0.spg.nc'),
 PosixPath('/Volumes/neuropixel_analysis/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021-a_g0/3-1-2021_A_g1_imec0/3-1-2021-a_g0.imec0.spg.nc'),
 PosixPath('/Volumes/neuropixel_analysis/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g0/3-1-2021_A_g1_imec0/3-1-2021_A_g0.imec0.spg.nc'),
 PosixPath('/Volumes/neuropixel_analysis/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_A_g1/3-1-2021_A_g1_imec0/3-1-2021_A_g1.imec0.spg.nc'),
 PosixPath('/Volumes/neuropixel_analysis/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_B_g0/3-1-2021_A_g1_imec0/3-1-2021_B_g0.imec0.spg.nc'),
 PosixPath('/Volumes/neuropixel_analysis/Data/chronic/CNPIX8-Allan/3-1-2021/3-1-2021_C_g0/3-1-2021_A_g1_imec0/3-1-2021_C_g0.imec0.spg.nc')]