# Imports

In [19]:
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [2]:
#Add bombcell to Python path if not installed with pip
demo_dir = Path(os.getcwd())
pyBombCell_dir = demo_dir.parent
sys.path.append(str(pyBombCell_dir))

In [3]:
%load_ext autoreload
%autoreload 2

import bombcell as bc

# Define data paths

By default: path to BombCell's toy dataset

In [4]:
ks_dir = demo_dir / 'toy_data'  # Replace with your kilosort directory
raw_dir = None  # Leave 'None' if no raw data; eventually replace with path to your raw data
save_path = "~/Downloads/bombcell_plots"  # ~ is home directory, / work on Windows

# If a raw data directory with a meta folder is not given,
# please input the gain manually
gain_to_uV = np.nan

In [5]:
spike_times_samples, spike_templates, template_waveforms, template_amplitudes, \
           pc_features, pc_features_idx, channel_positions, good_channels = bc.load_ephys_data(ks_dir)

if raw_dir != None:
    ephys_raw_data, meta_path = bc.manage_data_compression(raw_dir, decompressed_data_local = raw_dir)
    gain_to_uV = bc.get_gain_spikeglx(meta_path)
else:
    meta_path = None

In [6]:
param = bc.default_parameters(ks_dir, raw_dir, ephys_meta_dir = meta_path)

In [12]:
# Extract or load in raw waveforms
if raw_dir != None:
    raw_waveforms_full, raw_waveforms_peak_channel, SNR = bc.extract_raw_waveforms(
                        param,
                        spike_templates.squeeze(),
                        spike_times_samples.squeeze(),
                        param['re_extract_raw'],
                        save_path
                        )
else:
    raw_waveforms_full = None
    raw_waveforms_peak_channel = None
    SNR = None

In [14]:
# pre-load peak channels
max_channels = bc.get_waveform_max_channel(template_waveforms)

# Remove duplicate spikes
(non_empty_units,
 duplicate_spike_idx,
 spike_times_samples,
 spike_templates,
 template_amplitudes,
 pc_features,
 raw_waveforms_full,
 raw_waveforms_peak_channel,
 signal_to_noise_ratio,
 max_channels) = \
    bc.remove_duplicate_spikes(spike_times_samples,
                               spike_templates,
                               template_amplitudes,
                               max_channels,
                               save_path,
                               param,
                               pc_features = pc_features,
                               raw_waveforms_full = raw_waveforms_full,
                               raw_waveforms_peak_channel = raw_waveforms_peak_channel, 
                               signal_to_noise_ratio = SNR)


# Divide recording into time chunks
spike_times_seconds = spike_times_samples / param['ephys_sample_rate']
if param['compute_time_chunks']:
    time_chunks = np.arange(np.min(spike_times_seconds), np.max(spike_times_seconds), param['delta_time_chunk'])
else:
    time_chunks = np.array((np.min(spike_times_seconds), np.max(spike_times_seconds)))

# Should be got as part of removing duplicate spikes!!! 
unique_templates = np.unique(spike_templates) 

In [23]:
# Initialize quality metrics dictionnary
n_units = unique_templates.size
quality_metrics = bc.create_quality_metrics_dict(n_units, snr = SNR)
quality_metrics['max_channels'] = max_channels
param['use_hill_method'] = True # use the old method for RPVs
param['compute_time_chunks'] = False

# Complete with remaining quality metrics
quality_metrics, times = bc.get_all_quality_metrics(unique_templates,
                                                    spike_times_seconds,
                                                    spike_templates,
                                                    template_amplitudes,
                                                    time_chunks,
                                                    pc_features,
                                                    pc_features_idx,
                                                    quality_metrics,
                                                    raw_waveforms_full,
                                                    channel_positions,
                                                    template_waveforms, param)

Computing BombCell quality metrics: 100%|██████████| 15/15 units


In [24]:
# classify noise
nan_result = np.isnan(quality_metrics['n_peaks'])

too_many_peaks = quality_metrics['n_peaks']  > param['max_n_peaks']

too_many_troughs = quality_metrics['n_troughs'] > param['max_n_troughs']

too_short_waveform = quality_metrics['waveform_duration_peak_trough'] < param['min_wave_duration']

too_long_waveform = quality_metrics['waveform_duration_peak_trough'] > param['max_wave_duration']

too_noisy_baseline = quality_metrics['waveform_baseline'] > param['max_wave_baseline_fraction']

##
too_shallow_decay =quality_metrics['exp_decay'] > param['min_spatial_decay_slope']
to_steap_decay = quality_metrics['exp_decay'] < param['max_spatial_decay_slope']
# classify as mua
#ALL or ANY?

too_few_total_spikes = quality_metrics['n_spikes'] < param['min_num_spikes_total']

too_many_spikes_missing = quality_metrics['percent_missing_gaussian'] > param['max_perc_spikes_missing']

too_low_presence_ratio = quality_metrics['presence_ratio'] < param['min_presence_ratio']

too_many_RPVs = quality_metrics['fraction_RPVs']> param['max_RPV']

if param['extract_raw_waveforms'] and raw_waveforms_full is not None:
    too_small_amplitude = quality_metrics['raw_amplitude'] < param['min_amplitude'] 

    too_small_SNR =  quality_metrics['signal_to_noise_ratio'] < param['min_SNR'] 

if param['compute_drift']:
    too_large_drift = quality_metrics['max_drift_estimate'] > param['max_drift']

# determine if ALL unit is somatic or non-somatic
param['non_somatic_trough_peak_ratio'] = 1.25
param['non_somatic_peak_before_to_after_ratio'] = 1.2
#somatic == 0, non_somatic == 1
is_somatic = np.ones(unique_templates.size)

is_somatic[(quality_metrics['trough'] / np.max((quality_metrics['main_peak_before'] , quality_metrics['main_peak_after']), axis = 0)) < param['non_somatic_trough_peak_ratio']] = 0

is_somatic[(quality_metrics['main_peak_before'] / quality_metrics['main_peak_after'])  > param['non_somatic_peak_before_to_after_ratio']] = 0

is_somatic[(quality_metrics['main_peak_before'] * param['first_peak_ratio'] > quality_metrics['main_peak_after']) & (quality_metrics['width_before'] < param['min_width_first_peak']) \
    & (quality_metrics['main_peak_before'] * param['min_main_peak_to_trough_ratio'] > quality_metrics['trough']) & (quality_metrics['trough_width'] < param['min_width_main_trough'])] = 0


#is_somatic[np.isnan(quality_metrics['trough'])] = 0
quality_metrics['is_somatic_new'] = is_somatic

not_somatic = is_somatic == 1

In [None]:
param['min_main_peak_to_trough_ratio']
quality_metrics['trough']

['show_detail_plots',
 'show_summary_plots',
 'verbose',
 're_extract_raw',
 'save_as_tsv',
 'unit_type_for_phy',
 'ephys_kilosort_path',
 'save_mat_file',
 'remove_duplicate_spike',
 'duplicate_spikes_window_s',
 'save_spike_without_duplicates',
 'recompute_duplicate_spike',
 'detrend_waveform',
 'n_raw_spikes_to_extract',
 'save_multiple_raw',
 'decompress_data',
 'extract_raw_waveforms',
 'probe_type',
 'tauR_values_min',
 'tauR_values_max',
 'tauR_values_steps',
 'tauC',
 'compute_time_chunks',
 'delta_time_chunk',
 'presence_ratio_bin_size',
 'drift_bin_size',
 'compute_drift',
 'min_thresh_detect_peaks_troughs',
 'first_peak_ratio',
 'normalize_spatial_decay',
 'min_width_first_peak',
 'min_main_peak_to_trough_ratio',
 'min_width_main_trough',
 'ephys_sample_rate',
 'n_channels',
 'n_sync_channels',
 'compute_distance_metrics',
 'n_channels_iso_dist',
 'split_good_and_mua_non_somatic',
 'max_n_peaks',
 'max_n_troughs',
 'keep_only_somatic',
 'min_wave_duration',
 'max_wave_durati

In [37]:
unit_type, unit_type_string = bc.get_quality_unit_type(param, quality_metrics)

KeyError: 'main_trough_size'

In [26]:
qm_table_array = np.array((nan_result, too_many_peaks, too_many_troughs, too_short_waveform, too_long_waveform, too_noisy_baseline, too_shallow_decay, \
                           too_few_total_spikes, too_many_spikes_missing, too_many_RPVs, too_low_presence_ratio, not_somatic))

qm_table_array = np.vstack((qm_table_array, unit_type))
qm_table_array = np.vstack((unique_templates, qm_table_array))
#DO this for the optional params
qm_table = pd.DataFrame(qm_table_array, index = ['Original ID', 'NaN result', 'Peaks', 'Troughs', 'Waveform Min Length', 'Waveform Max Length', 'Baseline', 'Spatial Decay', \
                                                 'Min Spikes', 'Missing Spikes', 'RPVs', 'Presence Ratio', 'Not Somatic', 'Good Unit']).T


NameError: name 'unit_type' is not defined

In [None]:
bc.save_results(quality_metrics, unit_type_string, unique_templates, param, raw_waveforms_full, raw_waveforms_peak_channel, save_path)