In [1]:
import os
import numpy as np
import sys
import struct
import json
import datetime
import scipy
import scipy.signal
import matplotlib.pyplot as plt
%matplotlib inline

notebook_path = os.path.dirname(os.path.abspath("__file__"))
# the code path is two folders up from this notebook + /code
core_path = os.path.dirname(notebook_path)
basepath = os.path.dirname(os.path.dirname(notebook_path))

sys.path.append(core_path)
sys.path.append(basepath)

from core.readMDA import readMDA, get_Fs
from core.readBin import get_bin_data, get_raw_pos, get_active_tetrode, get_channel_from_tetrode, get_active_eeg
from core.Tint_Matlab import int16toint8, get_setfile_parameter
from core.tetrode_conversion import convert_tetrode, batch_basename_tetrodes
from core.convert_position import convert_position
from core.eeg_conversion import convert_eeg
from core.utils import find_sub, find_bin_basenames
from core.bin2mda import convert_bin2mda
from core.mdaSort import sort_bin
from core.set_conversion import convert_setfile
from core.intan_mountainsort import validate_session, convert_bin_mountainsort

# Analysis

In [6]:
# directory that you want to analyze
# directory = 'E:\\Apollo_D_Drive\\ApolloKlusta\\J20-sleep-1'
# directory = 'E:\\Apollo_D_Drive\\ApolloKlusta\\test_mountainsort'
directory = 'E:\\Apollo_D_Drive\\data\\B6-August-18-1'
directory = r'D:\5MinRecording'

# finds the basenames within this directory
basenames = find_bin_basenames(directory)

# prints all the basenames
print('Found %d basenames within this directory: %s' % (len(basenames), directory))
print('------------------------')
for name in basenames:
    print(name)

Found 1 basenames within this directory: D:\5MinRecording
------------------------
20171020-RAW-5MIN


## Analysis Parameters

In [7]:
whiten = 'true'  # do you want to whiten the data?
# whiten = 'false'

# detect_interval = 50 
detect_interval = 20  # roughly the number of samples to check for a spike
# the algorithm will take the detect_interval value and bin the data in bin sizes of that many
# samples. Then it will find the peak (or trough, or both) of each bin and evaluate that event
# if it exceeds the threshold value.

# recommend only doing positive peaks so we don't get any weird issues with a cell that is
# aligned with the peak, and seemingly the same cell aligned with the trough (in this case
# both peak and trough would have to exceed the threshold).

# detect_sign = 0  # positive or negative peaks
detect_sign = 1  # only positive peaks
# detect_sign = -1  # only negative peaks

# threshold values, I changed it into a whitened and non whitened threshold
# this is because if you whiten the data you normalize it by the variance, thus
# a threshold of 3 is essentially saying 3 standard deviations. However if you do not whiten
# the data is not normalized and thus, you would be using a bit value, maybe should take whatever
# value is in the threshold from the set file.

if whiten == 'true':
    # detect_threshold = 3  # units: ~sd's
    detect_threshold = 4  # units: ~sd's
    # ---------------
    automate_threshold = False  # Don't Change this
    
else:
    # this mean's the data was not whitened
    
    detect_threshold = 13000  #  units: bits 
    
    # if you want to find the threshold from the .set file and use that 
    # set automate_threshold to True, otherwise False. This threshold would override any
    # value set above. I'd recommend setting this to true as this is variable from .set file
    # to .set file it seems.
    # automate_threshold = True 
    automate_threshold = False
    
# bandpass filtering parameters
freq_min = 300  # this doesn't really matter because data is already filtered so it won't do the filtering
freq_max = 7000  # this doesn't really matter because data is already filtered so it won't do the filtering

pre_spike = 15
post_spike = 35

# artifact masking parameters
# here we bin the data into masked_chunk_size bins, and it will take the sqrt of the sum of 
# the squares (RSS) for each bin. It will then find the SD for all the bins, and if the bin is
# above mask_threshold SD's from the average bin RSS, it will consider it as high amplitude noise
# and remove this chunk (and neighboring chunks).
mask = True
mask = False
mask_threshold = 6  #  units: SD's
masked_chunk_size = None  # if none it will default to Fs/10
mask_num_write_chunks = 100  # 

# feature parameters
num_features = 10
max_num_clips_for_pca = 3000

# random parameters, probably don't need to change

clip_size = 50  # this needs to be left at 50 for Tint, Tint only likes 50 samples
notch_filter = False  # the data is already notch filtered likely
self = None  # don't worry about this, this is for objective oriented programming (my GUIs)

## Runs Analysis on each Basename

In [None]:
for i, current_basename in enumerate(basenames):
    print('Analyzing set_file %d/%d: ' % (i+1, len(basenames)))
    
    if whiten != 'true' and automate_threshold:
        # then you decided you want to automatically get the threshold from the .set file
        set_filename = '%s.set' % os.path.join(directory, current_basename)
        detect_threshold = int(get_setfile_parameter('threshold', set_filename))
    
    print('Using the following detect_threshold: %.2f' % (float(detect_threshold)))
        
    convert_bin_mountainsort(directory, current_basename, whiten=whiten, 
                             detect_interval=detect_interval,
                             detect_sign=detect_sign, 
                             detect_threshold=detect_threshold, 
                             freq_min=freq_min,
                             freq_max=freq_max, mask_threshold=mask_threshold, 
                             masked_chunk_size=masked_chunk_size,
                             mask_num_write_chunks=mask_num_write_chunks, 
                             clip_size=clip_size, 
                             mask=mask,
                             num_features=num_features,
                             max_num_clips_for_pca=max_num_clips_for_pca,
                             pre_spike=pre_spike, post_spike=post_spike,
                             notch_filter=notch_filter, self=self)
    
    print('-------------------')
    
print('Finished Analysis')

Analyzing set_file 1/1: 
Using the following detect_threshold: 4.00
[2019-05-15 10:47:11]: Converting the following tetrode: 1!
[2019-05-15 10:47:20]: Converting the following tetrode: 2!
[2019-05-15 10:47:28]: Converting the following tetrode: 3!
[2019-05-15 10:47:37]: Converting the following tetrode: 4!
ml-run-process ms4_geoff.sort --inputs filt_fname:/mnt/d/5MinRecording/20171020-RAW-5MIN_T1_filt.mda --outputs firings_out:/mnt/d/5MinRecording/20171020-RAW-5MIN_T1_firings.mda pre_out_fname:/mnt/d/5MinRecording/20171020-RAW-5MIN_T1_pre.mda metrics_out_fname:/mnt/d/5MinRecording/20171020-RAW-5MIN_T1_metrics.json --parameters freq_min:300 freq_max:7000 samplerate:48000 detect_sign:1 adjacency_radius:-1 detect_threshold:4 detect_interval:20 clip_size:50 firing_rate_thresh:0.05 isolation_thresh:0.95 noise_overlap_thresh:0.03 peak_snr_thresh:1.5 mask_artifacts:false mask_chunk_size:4800 mask_threshold:6 mask_num_write_chunks:100 num_workers:12 whiten:true num_features:10 max_num_clips_fo