In [116]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [117]:
from pathlib import Path
from datetime import datetime
import subprocess

from ecephys_spike_sorting.scripts.create_input_json import createInputJson
from ecephys_analyses.data import channel_groups, paths, parameters

In [149]:
## USER

ks_sorting_condition = 'ks2_5_catgt_df'
postprocessing_condition = 'postprocess_df'

# ks_make_copy = True  # Copy original ks directory (except *.dat) under <ks_dirname>_orig
ks_make_copy = False  # Copy original ks directory (except *.dat) under <ks_dirname>_orig

# Subject, condition, catgt data
data_conditions = [
    ('Alessandro', 'rem-stim', True),
    ('Alessandro', 'rem-stim2', True),
    ('Alessandro', 'sleep-stim', True),
    ('Alessandro', 'West', True),
    
]

n_jobs = 1  # Don't parallelize (possibly messed up tmp directories?)
assert n_jobs == 1

## end USER

In [150]:
def run_postprocessing(
    subject, condition,
    sorting_condition, postprocessing_condition,
    rerun_existing=True
):
    
    # Modules and associated parameters
    modules = []
    params = []
    module_name_params = parameters.get_analysis_params(
        'ks_postprocessing',
       postprocessing_condition 
    )
    for d in module_name_params:
        modules += d.keys()
        params += d.values()
    
    # Data
    binpaths = paths.get_sglx_style_datapaths(
        subject, 
        condition,
        'ap.bin',
        catgt_data=True,
    )
    assert len(binpaths) == 1
    binpath = binpaths[0]
    metapath = binpath.parent/(binpath.stem + '.meta')
    print(f"catGT preprocessed data: {binpath}\n")
    
    # Paths
    base_data_dir = paths.get_sglx_style_datapaths(
        subject,
        condition,
        'dummy',
        catgt_data=catgt_data
    )[0].parent
    # KS input dir
    ks_dir = base_data_dir/sorting_condition
    assert ks_dir.exists()
    print(f"Kilosort input results dir: {ks_dir}\n")

    if ks_make_copy:
        copy_ks_dir(ks_dir)
    
    # Save config in ks dir
    cfg_path = ks_dir/'postprocessing-input.json'
    json_dir = ks_dir

    # KS version
    KS2ver = get_ks_version(sorting_condition)
    
    # Params
    kwargs_dict = {k: v for d in params for k, v in d.items()}
    input_json = createInputJson(
        str(cfg_path),
        # Directories and data
        input_meta_path=str(metapath),
        continuous_file=str(binpath),  # CatGT (unused)
        npx_directory=str(binpath.parents[3]),  # CatGT (unused)
        extracted_data_directory=str(binpath.parents[3]),  # CatGT (unused)
        spikeGLX_data=True,
        kilosort_output_directory=str(ks_dir),
        ks_make_copy=ks_make_copy,
        KS2ver=KS2ver,
        **kwargs_dict
   )   

#     # rerun existing
#     if (output_dir/'spike_times.npy').exists() and not rerun_existing:
#         print(f'Passing: output directory is done: {output_dir}\n\n')
#         return

    print('running')
    start = datetime.now()
    
    for module in modules:
        output_json = json_dir/(f"{module}-output.json")  
        command = (
            f"python -W ignore -m ecephys_spike_sorting.modules.{module}"
            f" --input_json {cfg_path}"
            f" --output_json {output_json}"
        )
        print(f'Running command `{command}`')
#         subprocess.check_call(command.split(' '))
        !{command}

    end = datetime.now()
    print(f"{end.strftime('%H:%M:%S')}: Finished {subject}, {condition}, {sorting_condition}.")
    print(f"Run time = {str(end - start)}\n")

    return 1

def copy_ks_dir(ks_dir):
    "Copy to <ks_dirname>_orig and symlink .dat files."
    import os, shutil
    copy_dir = ks_dir.parent/(ks_dir.name + '_orig')
    if copy_dir.exists():
        print(f"Removing ks dir at {copy_dir}..")
        shutil.rmtree(copy_dir)
    print(f"Copying original ks dir to {copy_dir}\n")
    os.mkdir(copy_dir)
    for file in [f for f in ks_dir.iterdir()]:
        if file.suffix == '.dat':
            # Symlink all data files
            dest = copy_dir/file.name
            os.symlink(Path('..')/file.parent/file.name, dest)
        else:
            # Don't use copytree because chmod raises PermissionError on smb share
            shutil.copyfile(file, copy_dir/file.name)

def get_ks_version(sorting_condition):
    sorter_name, _ = parameters.get_analysis_params(
        'sorting',
        sorting_condition 
    )
    if sorter_name == 'kilosort2_5':
        return '2.5'
    elif sorter_name == 'kilosort2':
        return '2.0'
    elif sorter_name == 'kilosort3':
        return '3.0'
    else:
        assert False

In [151]:
for (subject, condition, catgt_data) in data_conditions:
    print(subject, condition, catgt_data, ks_sorting_condition, postprocessing_condition)
    assert catgt_data
    run_postprocessing(
        subject,
        condition,
        ks_sorting_condition,
        postprocessing_condition,
        rerun_existing=False
    )

Alessandro rem-stim True ks2_5_catgt_df postprocess_df
catGT preprocessed data: /Volumes/neuropixel/Data/tom/catgt/CNPIX5-Alessandro/8-31-2020/catgt_8-31-2020_REMStim_g0/8-31-2020_REMStim_g0_imec0/8-31-2020_REMStim_g0_tcat.imec0.ap.bin

Kilosort input results dir: /Volumes/neuropixel/Data/tom/catgt/CNPIX5-Alessandro/8-31-2020/catgt_8-31-2020_REMStim_g0/8-31-2020_REMStim_g0_imec0/ks2_5_catgt_df

SpikeGLX params read from meta
probe type: NP1, sample_rate: 30000.16964, num_channels: 385, uVPerBit: 2.3438
kilosort output directory: /Volumes/neuropixel/Data/tom/catgt/CNPIX5-Alessandro/8-31-2020/catgt_8-31-2020_REMStim_g0/8-31-2020_REMStim_g0_imec0/ks2_5_catgt_df
running
Running command `python -W ignore -m ecephys_spike_sorting.modules.noise_templates --input_json /Volumes/neuropixel/Data/tom/catgt/CNPIX5-Alessandro/8-31-2020/catgt_8-31-2020_REMStim_g0/8-31-2020_REMStim_g0_imec0/ks2_5_catgt_df/postprocessing-input.json --output_json /Volumes/neuropixel/Data/tom/catgt/CNPIX5-Alessandro/8-31