# Import the required Python Modules (and modules created for BinMSGUI)

In [1]:
import os
import numpy as np
import sys
import struct
import json
import datetime
import scipy
import scipy.signal
import matplotlib.pyplot as plt
%matplotlib inline

# this will obtain the path for the notebook
# if you have moved the notebook, you will need to set the notebook_path to the
# .../BinMSGUI/BinMSGUI/jupyter directory
notebook_path = os.path.dirname(os.path.abspath("__file__"))

# the code path is two folders up from this notebook + /code
core_path = os.path.dirname(notebook_path)
basepath = os.path.dirname(os.path.dirname(notebook_path))

# add the core_path and basepath so we can import core.module_name
sys.path.append(core_path)
sys.path.append(basepath)

# import the binMSGUI modules
from core.readMDA import readMDA, get_Fs
from core.readBin import get_bin_data, get_raw_pos, get_active_tetrode, get_channel_from_tetrode, get_active_eeg
from core.Tint_Matlab import int16toint8, get_setfile_parameter
from core.tetrode_conversion import convert_tetrode, batch_basename_tetrodes
from core.convert_position import convert_position
from core.eeg_conversion import convert_eeg
from core.utils import find_sub, find_bin_basenames
from core.bin2mda import convert_bin2mda
from core.mdaSort import sort_bin
from core.set_conversion import convert_setfile
from core.intan_mountainsort import validate_session, convert_bin_mountainsort

# Choose Directory Containing .Bin Files to Analyze

In [2]:
# directory that you want to analyze
directory = r'C:\Users\geba\Desktop\5MinRecording'

# finds the basenames within this directory
basenames = find_bin_basenames(directory)

# prints all the basenames
print('Found %d basename(s) within this directory: %s' % (len(basenames), directory))
print('------------------------')
for name in basenames:
    print(name)

Found 1 basename(s) within this directory: C:\Users\geba\Desktop\5MinRecording
------------------------
20171020-RAW-5MIN


## Analysis Parameters
This cell will contain all the parameters that are required for the analysis. There are quite a few so make sure that you check that the parameters are correct before proceeding.

In [3]:
########### filtering parameters ###########
# The .bin data is already filtered based upon whatever settings you chose in the dacqUSB, therefore this
# is just for show and does not do anything. If you want me to add an actual filtering step feel free to
# let me know, right now this is just for show.

freq_min = 300  # -units Hz- default 300
freq_max = 7000  # -units Hz- default 7000
notch_filter = False  # the data is already notch filtered likely

########## Clip Parameters ##############
pre_spike = 15
post_spike = 35
clip_size = 50 # this needs to be left at 50 for Tint, Tint only likes 50 samples
if (pre_spike + post_spike != clip_size) or (clip_size != 50):
    raise Exception('The pre_spike and the post_spike must add up to be 50, and the clip size must remain at 50 for Tint!')

############ whiten ####################
# do you want to whiten the data? The MountainSort Paper suggests that spatial whitening is crucial
# for separating nearby clusters since it will remove any correlations among channels. Keep in mind
# that spatial whitening will also normalize your data so when you threshold you will be thresholding
# based off of # of standard deviations, and not a bit/uV value.
# if you want to whiten, set whiten='true', if you do not set whiten='false'
whiten = 'true' 
# whiten = 'false'

############# detect_sign ################
# recommend only doing positive peaks so we don't get any weird issues with a cell that is
# aligned with the peak, and seemingly the same cell aligned with the trough (in this case
# both peak and trough would have to exceed the threshold).

# detect_sign = 0  # positive or negative peaks
detect_sign = 1  # only positive peaks
# detect_sign = -1  # only negative peaks

########## detect_interval ##############
# the algorithm will take the detect_interval value and bin the data in bin sizes of that many
# samples. Then it will find the peak (or trough, or both) of each bin and evaluate that event
# if it exceeds the threshold value. Therefore the detect_interval is roughly the number of 
# samples between the events (peaks/troughs depending on your detect_sign)

# default detect_interval is 50

# detect_interval = 50 
detect_interval = 20  

############ detect_threshold ###############

# threshold values, I changed it into a whitened and non whitened threshold
# this is because if you whiten the data you normalize it by the variance, thus
# a threshold of 3 is essentially saying 3 standard deviations. However if you do not whiten
# the data is not normalized and thus, you would be using a bit value, maybe should take whatever
# value is in the threshold from the set file.

automate_threshold = False  # Don't Change this, here we are just initializing the automate_threshold value to False by default

if whiten == 'true':
    # detect_threshold = 3  # units: ~sd's
    detect_threshold = 4  # units: ~sd's
    
else:
    # this mean's the data was not whitened
    
    detect_threshold = 13000  #  units: bits 
    
    # if you want to find the threshold from the .set file and use that 
    # set automate_threshold to True, otherwise False. This threshold would override any
    # value set above. I'd recommend setting this to true as this is variable from .set file
    # to .set file it seems.
    # automate_threshold = True 
    automate_threshold = False

# ########### artifact masking parameters ###########
# here we bin the data into masked_chunk_size bins, and it will take the sqrt of the sum of 
# the squares (RSS) for each bin. It will then find the SD for all the bins, and if the bin is
# above mask_threshold SD's from the average bin RSS, it will consider it as high amplitude noise
# and remove this chunk (and neighboring chunks).

# mask = True  # set the value to True if you want to include the artifact masking step
mask = False  # set the value to False if you want to skip the artifact masking step
mask_threshold = 6  #  units: SD's, the threshold that once exceed the bin/chunk will be zero'ed (and the neighbors)

# the size of the bins/chunks to use when binning the data. If the value is set to  None 
# the defaul tmasked_chunk_size it will default to Fs/20
masked_chunk_size = None  

mask_num_write_chunks = 100  # how many chunks will be simultaneously written to the masked output file

########## Feature/PCA Parameters ##########
num_features = 10
max_num_clips_for_pca = 1000

# random parameters, probably don't need to change
self = None  # don't worry about this, this is for objective oriented programming (my GUIs)

## Runs Analysis on each Basename

In [5]:
for i, current_basename in enumerate(basenames):
    print('Analyzing set_file %d/%d: ' % (i+1, len(basenames)))
    
    if whiten != 'true' and automate_threshold:
        # then you decided you want to automatically get the threshold from the .set file
        set_filename = '%s.set' % os.path.join(directory, current_basename)
        detect_threshold = int(get_setfile_parameter('threshold', set_filename))
    
    print('Using the following detect_threshold: %.2f' % (float(detect_threshold)))
        
    convert_bin_mountainsort(directory, current_basename, whiten=whiten, 
                             detect_interval=detect_interval,
                             detect_sign=detect_sign, 
                             detect_threshold=detect_threshold, 
                             freq_min=freq_min,
                             freq_max=freq_max, mask_threshold=mask_threshold, 
                             masked_chunk_size=masked_chunk_size,
                             mask_num_write_chunks=mask_num_write_chunks, 
                             clip_size=clip_size, 
                             mask=mask,
                             num_features=num_features,
                             max_num_clips_for_pca=max_num_clips_for_pca,
                             pre_spike=pre_spike, post_spike=post_spike,
                             notch_filter=notch_filter, self=self)
    
    print('-------------------')
    
print('Finished Analysis')

Analyzing set_file 1/1: 
Using the following detect_threshold: 4.00
[2019-08-22 17:52:17]: The following set file has already been created: C:\Users\geba\Desktop\5MinRecording\20171020-RAW-5MIN_ms.set, skipping creation!#Red
[2019-08-22 17:52:17]: Converting the following tetrode: 1!
[2019-08-22 17:52:24]: The following filename already exists: C:\Users\geba\Desktop\5MinRecording\20171020-RAW-5MIN_T1_filt.mda, skipping conversion!#Red
[2019-08-22 17:52:24]: Converting the following tetrode: 2!
[2019-08-22 17:52:29]: The following filename already exists: C:\Users\geba\Desktop\5MinRecording\20171020-RAW-5MIN_T2_filt.mda, skipping conversion!#Red
[2019-08-22 17:52:29]: Converting the following tetrode: 3!
[2019-08-22 17:52:34]: The following filename already exists: C:\Users\geba\Desktop\5MinRecording\20171020-RAW-5MIN_T3_filt.mda, skipping conversion!#Red
[2019-08-22 17:52:34]: Converting the following tetrode: 4!
[2019-08-22 17:52:38]: The following filename already exists: C:\Users\ge