# This notebook aims to convert Open Ephys output files (.raw.kwd) and folders to proper names and formats.
# This notebook contains the following sections:

## Rename Directories and files altogether
## Convert .raw.kwd files and save .dat files
## Save .prm files
## Save .eeg files

### Run all the cells one time then put the preferred values in the last cell and run it. It will call all the functions in the previous cells

-Importing modules

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import tables as tb
import glob
from copy import deepcopy
from datetime import datetime,timedelta
import numpy as np
import xmltodict

def find_file(path, extension=['.raw.kwd']):
    """
    This function finds all the file types specified by 'extension' (ex: *.dat) in the 'path' directory
    and all its subdirectories and their sub-subdirectories etc., 
    and returns a list of all dat file paths
    'extension' is a list of desired file extensions: ['.dat','.prm']
    """
    if type(extension) is str:
        extension=extension.split()   #turning extension into a list with a single element
    return [os.path.join(walking[0],goodfile) for walking in list(os.walk(path)) 
         for goodfile in walking[2] for ext in extension if goodfile.endswith(ext)]

## Renaming

-Functions and classes required for renaming all the OpenEphys files and folders

In [3]:
def rename_folder(folderToRemove,newFolder):
    '''
    folderToRemove (=old), newFolder: full paths to folders
    merge the folders if "newFolder" already exists
    files are renamed (example: folderToRemove_1.txt becomes newFolder_1.txt)
    '''
    #if the new folder doesn't exist, create it
    if not os.path.exists(newFolder):
        os.mkdir(newFolder) 
    #move and rename the files 
    for f in os.listdir(folderToRemove):
        oldpath=os.path.join(folderToRemove,f)
        oldFolderName=os.path.basename(folderToRemove.rstrip(os.sep))
        newFolderName=os.path.basename(newFolder.rstrip(os.sep))
        f=f.replace(oldFolderName,newFolderName)
        newpath=os.path.join(newFolder,f)
        if os.path.exists(newpath):
            os.remove(oldpath)
        else:
            os.rename(oldpath,newpath)
    os.rmdir(folderToRemove)
    
def get_folders_matching_format(rootFolder,globFormat):
    '''
    rootFolder: full path where to look for session folders (ex: "/data/Rat034/Experiments")
    '''
    fullFormat=os.path.join(rootFolder,globFormat)
    return [os.path.basename(f) for f in glob.glob(fullFormat)]

def get_regular_name(wrongName,wrongTimeFormat,regularTimeFormat):
    '''
    from a session name in wrong format, return the regular format
    '''
    date=datetime.strptime(wrongName,wrongTimeFormat)
    return date.strftime(regularTimeFormat)

def get_date(name,timeFormat):
    '''
    return a datetime object from a session name and a format
    '''
    date=datetime.strptime(name,timeFormat)
    return date

def get_regular_name_from_date(date,regularTimeFormat):
    '''
    return a name given a date and a format
    '''
    return date.strftime(regularTimeFormat)

def rename_all_in_rootFolder(rootFolder,wrongGlobFormat,wrongTimeFormat,regularGlobFormat,
                             regularTimeFormat,minuteDelay=2,verbose=False):
    '''
    rootFolder: full path where to look for session folders (ex: "/data/Rat034/Experiments")
    wrongGlobFormat: glob pattern to search for the wrong session names
    wrongTimeFormat: datetime pattern to read date from wrong session names
    regularGlobFormat, regularTimeFormat: glob and datetime patterns for the right session names
    minuteDelay: if two folders have the same date (give or take minuteDelay), merge them
    '''
    delay=timedelta(minutes=minuteDelay)

    #get names and date of the regular folders
    regularFolders=get_folders_matching_format(rootFolder,regularGlobFormat)
    allRegularDates=[get_date(name,regularTimeFormat) for name in regularFolders]
    
    #get names of wrong folders
    fList=get_folders_matching_format(rootFolder,wrongGlobFormat)
    for f in fList:
        merge=False
        #check if there is a regular folder around the same date
        date=get_date(f,wrongTimeFormat)
        for otherDate in allRegularDates:
            if abs(date-otherDate)<delay:
                date=otherDate
                merge=True
                break
        #new name
        newFolder=get_regular_name_from_date(date,regularTimeFormat)
        #rename/merge
        newPath=os.path.join(rootFolder,newFolder)
        oldPath=os.path.join(rootFolder,f)
        rename_folder(oldPath,newPath)
        if verbose:
            if merge:
                print("Merged %s into %s"%(f,newFolder))
            else:
                print("Renamed %s in %s"%(f,newFolder))
                
def rename_files_to_match_folder(folderPath,extensionList):
    '''
    folderPath= "/data/Rat034/Experiments/Rat034_2015_etc"
    extensionList= [".dat", ".prm", ".kwik"]
    --> Renames "someFile.dat" into "Rat024_2015_etc.dat"
    '''
    folderName=os.path.basename(folderPath.rstrip(os.sep))
    extensionList=[ext if ext.startswith(".") else "."+ext for ext in extensionList]
    for f in os.listdir(folderPath):
        for ext in extensionList:
            if f.endswith(ext):
                oldPath=os.path.join(folderPath,f)
                newName=folderName+ext
                newPath=os.path.join(folderPath,newName)
                os.rename(oldPath,newPath)
                break


-Function to rename the files and folders of OpenEphys as a Batch

In [4]:
def rename_batch(root,animalList,verbose,minuteDelay,extensionList):
    
    WRONG_FORMATS={
    "openEphys":(
        "animal_20{0}-{0}-{0}_{0}-{0}-{0}".format("[00-99]?"),    #ex: 'Rat001_2022-02-21_11-28-17'
        "animal_%Y-%m-%d_%H-%M-%S"
    ),
    "seconds":(
        "animal_20{0}_{0}_{0}_{0}_{0}_{0}".format("[00-99]?"),    #ex: 'Rat001_2013_12_26_20_47_54'
        "animal_%Y_%m_%d_%H_%M_%S"
    ),
    "tiret":(
        "animal_20{0}_{0}_{0}-{0}_{0}".format("[00-99]?"),        #ex: 'Rat001_2039_10_15-06_27'
        "animal_%Y_%m_%d-%H_%M"
    ),
    "tiretSeconds":(
        "animal_20{0}_{0}_{0}-{0}_{0}_{0}".format("[00-99]?"),    #ex: 'Rat001_2033_06_23-21_24_46'
        "animal_%Y_%m_%d-%H_%M_%S"
    ),
    "openEphys2":(
        "20{0}-{0}-{0}_{0}-{0}-{0}".format("[00-99]?"),           #ex: '2015-02-15_11-59-48'
        "%Y-%m-%d_%H-%M-%S"
    ),
    }
    # The right format 
    REGULAR_FORMAT=("animal_20{0}_{0}_{0}_{0}_{0}".format("?[00-99]"),  #ex: 'Rat001_2039_10_15_06_27'
                "animal_%Y_%m_%d_%H_%M")
      
    for animal in animalList:
        rawFileList=find_file(os.path.join(root,animal),extensionList)
        rootFolderList=[os.path.split(os.path.split(rawFile)[0])[0] for rawFile in rawFileList]
        #rootFolderList keeps paths to the directories within 'root' which contain session folders
        rootFolderList=list(set(rootFolderList))#to keep unique members
        
        for rootFolder in rootFolderList:
            print("animal %s: %s"%(animal,rootFolder))
    
            #RENAME
            regularGlob=REGULAR_FORMAT[0].replace("animal",animal)
            regularTime=REGULAR_FORMAT[1].replace("animal",animal)
            for name in WRONG_FORMATS:
                globFormat=WRONG_FORMATS[name][0].replace("animal",animal)
                timeFormat=WRONG_FORMATS[name][1].replace("animal",animal)       
                #print(get_folders_matching_format(rootFolder,globFormat))
                rename_all_in_rootFolder(rootFolder,globFormat,timeFormat,regularGlob,regularTime,
                                         minuteDelay=minuteDelay,verbose=verbose)
    
            #CONVERT
            #for every session folder of the animal
            for folder in os.listdir(rootFolder):
                if not folder.startswith(animal):
                    continue
                print("*"+folder)
                path=os.path.join(rootFolder,folder)
                #rename files
                rename_files_to_match_folder(path,extensionList)

                for f in os.listdir(path):
                    fpath=os.path.join(path,f)
                    #convert raw.kwd
                    if f=="settings.xml":
                        newPath=os.path.join(path,"settingsOpenEphys.xml")
                        os.rename(fpath,newPath)
    
            print("\nRenaming done")
            print("--------")


#-----------------------------------------------------------------------------------------------
if "__file__" not in dir():
    root="/data/SWI002/22/"
    #Animal where to run the script
    animalList=["SWI002"]

    #Whether to print "X renamed in Y"
    verbose=True  

    #If a wrong folder and a regular folder are less than "minuteDelay" apart, they are merged
    # example: if minuteDelay=2, 'Rat001_2034_04_22-04_26_12' would be merged with 'Rat001_2034_04_22_04_27'
    minuteDelay=0

    #File to rename (ex: "someFile.dat" -> "Rat024_2015_etc.dat")
    #extensionList=[".dat","+6.raw.kwd",".nrs",".kwx",".kwik",".prm",".prb"]
    extensionList=[".dat",".raw.kwd",".nrs",".kwx",".kwe",".eeg"]
    
    #--------------------------------------------------------------------------------
    rename_batch(root,animalList,verbose,minuteDelay,extensionList)

## Converting
-Required functions and classes to open .raw.kwd file

In [5]:
# -------------------------------------------------------------- chunck class from Klusta
# reads data chunk by chunk (instead of all at once, as the files are big)
class Chunk(object):
    def __init__(self, data=None, nsamples=None, nchannels=None,
                 bounds=None, dtype=None, recording=0, nrecordings=1):
        self._data = data
        if nsamples is None and nchannels is None:
            nsamples, nchannels = data.shape
        self.nsamples = nsamples
        self.nchannels = nchannels
        self.dtype = dtype
        self.recording = recording
        self.nrecordings = nrecordings
        self.s_start, self.s_end, self.keep_start, self.keep_end = bounds
        self.window_full = self.s_start, self.s_end
        self.window_keep = self.keep_start, self.keep_end      
    @property
    def data_chunk_full(self):
        chunk = self._data[self.s_start:self.s_end,:]
        return convert_dtype(chunk, self.dtype)   
    @property
    def data_chunk_keep(self):
        chunk =  self._data[self.keep_start:self.keep_end,:]
        return convert_dtype(chunk, self.dtype)

def chunk_bounds(nsamples, chunk_size, overlap=0):
    s_start = 0
    s_end = chunk_size
    keep_start = s_start
    keep_end = s_end - overlap // 2
    yield s_start, s_end, keep_start, keep_end    
    while s_end - overlap + chunk_size < nsamples:
        s_start = s_end - overlap
        s_end = s_start + chunk_size
        keep_start = keep_end
        keep_end = s_end - overlap // 2
        if s_start < s_end:
            yield s_start, s_end, keep_start, keep_end        
    s_start = s_end - overlap
    s_end = nsamples
    keep_start = keep_end
    keep_end = s_end
    if s_start < s_end:
        yield s_start, s_end, keep_start, keep_end

def convert_dtype(data, dtype=None, factor=None):
    if not dtype:
        return data
    if data.shape[0] == 0:
        return data.astype(dtype)
    dtype_old = data.dtype
    if dtype_old == dtype:
        return data
    key = (_get_dtype(dtype_old), _get_dtype(dtype))
    factor = factor or _dtype_factors.get(key, 1)
    if dtype_old in (np.float32, np.float64):
        factor = factor/np.abs(data).max()
    if factor != 1:
        return (data * factor).astype(dtype)
    else:
        return data.astype(dtype) 
#------------------------------------------------------------------------------ open a file, from kwiklib 
def open_file(path,mode="r"):
    try:
        f = tb.open_file(path, mode)
        return f
    except IOError as e:
        warn("IOError: " + str(e.message))
        return
# ------------------------------------------------------------------- KWD and .dat data reader from Klusta
class BaseRawDataReader(object):
    def __init__(self, dtype_to=np.int16):
        self.dtype_to = dtype_to
    def next_recording(self):
        for self.recording in range(self.nrecordings):
            yield self.recording, self.get_recording_data(self.recording)    
    def get_recording_data(self, recording):
        raise NotImplementedError()    
    def chunks(self, chunk_size=None, chunk_overlap=0):
        for recording, data in self.next_recording():
            assert chunk_size is not None, "You need to specify a chunk size."""
            for bounds in chunk_bounds(data.shape[0], chunk_size=chunk_size, overlap=chunk_overlap):
                yield Chunk(data, bounds=bounds, dtype=self.dtype_to, recording=recording, nrecordings=self.nrecordings)
class KwdRawDataReader(BaseRawDataReader):
    def __init__(self, filename, dtype_to=np.int16):
        self.kwd = open_file(filename, 'r')
        self.nrecordings = self.kwd.root.recordings._v_nchildren
        super(KwdRawDataReader, self).__init__(dtype_to=dtype_to)        
    def get_recording_data(self, recording):
        data = self.kwd.root.recordings._f_get_child(str(recording)).data
        return data 

-Function to open the .raw.kwd file and save the .dat file

In [6]:
def convert_kwd_to_dat(filename,outputname=None,overwrite=False):
    #output file name
    if outputname is None:
        outputname=filename[:-7]+"dat"
    out=os.path.basename(outputname)
    
    if filename.endswith('.raw.kwd'):
        #read number of channels in kwd file
        with tb.open_file(filename,"r") as kwd:
            print("\n\nopenning %s"%(filename))
            dataLen,nchannels=kwd.get_node("/recordings/0/data").read().shape
            sampling_rate=kwd.get_node("/recordings/0/")._v_attrs.__getitem__('sample_rate')
            
            if dataLen<nchannels:
                dataLen,nchannels=nchannels,dataLen
            if dataLen<sampling_rate:    #not an important condition! Could be ignored!
                raise ValueError('Data Length too short')
            
            print("number of channels: %s"%(nchannels))
            print("sampling rate: %sHz"%(sampling_rate))
            print("Converting...")

        if os.path.exists(outputname):
            if overwrite:
                print("\n%s already exist, it will be overwritten"%(out))
            else:
                print("\n%s already exists, no conversion to do"%(out))
                return [nchannels, sampling_rate]
        #instantiate reader
        kwd_data=KwdRawDataReader(filename)
    else:
        print("Error: the raw data file doesn't end with '.raw.kwd'")
        return False

    #create dat file, open write only in binary
    try:
        with open(outputname,'wb') as output:
            chunk_size=sampling_rate
            for chunk in kwd_data.chunks(chunk_size):
                data=chunk.data_chunk_full
                for i in range(len(data)):
                    newFileByteArray=bytearray(data[i])
                    output.write(newFileByteArray)
        kwd_data.kwd.close()
        print("wrote %s"%(outputname))
        print("done")
        return [nchannels, sampling_rate]
    except:
        print("Dat file writing failed!")
        return False

#--------------------------------------------------------------------------------------------------
if "__file__" not in dir():
    inputFile="/home/david/Downloads/2016-11-22_09-52-54/test8/test8_2016_11_22_09_52/test8_2016_11_22_09_52.raw.kwd"
    deleteKWD=False
    overwrite=False

    convert_kwd_to_dat(inputFile,overwrite=overwrite)

    if deleteKWD and os.path.exists(inputFile) and False:
        print("Delete %s"%os.path.basename(inputFile))
        os.remove(inputFile)
        

OSError: ``/home/david/Downloads/2016-11-22_09-52-54/test8/test8_2016_11_22_09_52/test8_2016_11_22_09_52.raw.kwd`` does not exist

-Save the .prm file with correct experiment name, sampling rate and channel number

In [7]:
def save_prm_file(filepath,experiment,sampling_rate,n_channels,overwrite=True):
    output_name=os.path.join(filepath,experiment+".prm")
    if os.path.exists(output_name):
        if overwrite:
            print("%s already exists, it will be overwritten"%(output_name))
        else:
            print("%s already exists, no conversion to do"%(output_name))
            return False
    
    PlaceHolders={
        'exp_name':experiment,
        'Fs':str(sampling_rate),
        'Nch':str(n_channels),
    }
    prm_content="""experiment_name = '%(exp_name)s'
prb_file = experiment_name + '.prb'

traces=dict(
    raw_data_files  = [experiment_name + '.dat'],
    voltage_gain    = 10,
    nbits           = 16,
    dtype           = 'int16',
    sample_rate     = %(Fs)s,
    n_channels      = %(Nch)s,
    )

nbits          = 16
voltage_gain   = traces['voltage_gain']
sample_rate    = traces['sample_rate']
nchannels      = traces['n_channels']

spikedetekt=dict(
    #######################################################################
    # SpikeDetekt parameters
    #######################################################################

    # ---------------------------------------------------------------------
    # Raw data filtering and saving
    # ---------------------------------------------------------------------
    # Whether to save the .raw.kwd file if a non-HDF5 raw data format is used.
    # This is needed to visualise the data in TraceView etc, and speeds up
    # future runs of SpikeDetekt. If a .raw.kwd file is used as the input,
    # it will never be overwritten.
    save_raw = True,
    # Whether to save the .high.kwd file with HPF data used for spike
    # detection. This is processed using a Butterworth band-pass filter.
    save_high = False,
    # Bandpass filter low corner frequency
    filter_low = 500.,
    # Bandpass filter high corner frequency
    filter_high = 0.95 * .5 * sample_rate,
    # Order of Butterworth filter.
    filter_butter_order = 3,
    # Whether to save a .low.kwd file; this is processed using a Hamming
    # window FIR filter, then subsampled 16x to save space when storing.
    save_low = False,
    
    # ---------------------------------------------------------------------
    # Chunks
    # ---------------------------------------------------------------------
    # SpikeDetekt processes the raw data in chunks with small overlaps to
    # catch spikes which would otherwise span two chunks. These options
    # will change the default chunk size and overlap.
    chunk_size = int(1. * sample_rate), # 1 second
    chunk_overlap = int(.015 * sample_rate), # 15 ms


    # ---------------------------------------------------------------------
    # Threshold setting for spike detection
    # ---------------------------------------------------------------------
    # Change this to 'positive' to detect positive spikes.
    detect_spikes = 'negative',
    # SpikeDetekt takes a set of uniformly distributed chunks throughout
    # the high-pass filtered data to estimate its standard deviation. These
    # parameters select how many excerpts are used and how long each of them are.
    nexcerpts = 50,
    excerpt_size = int(1. * sample_rate), # 1 second
    # This is then used to calculate a base threshold which is multiplied
    # by the two parameters below for the two-threshold detection process.
    threshold_strong_std_factor = 4.5,
    threshold_weak_std_factor = 2.,

    # ---------------------------------------------------------------------
    # Spike extraction
    # ---------------------------------------------------------------------
    # The number of samples to extract before and after the centre of the
    # spike for waveforms. Then, waveforms_nsamples is calculated using the
    # formula: waveforms_nsamples = extract_s_before + extract_s_after
    extract_s_before = int(0.0008* sample_rate),
    extract_s_after  = int(0.0008* sample_rate),

    #---------------------------------------------------------------------
    # Features
    # ---------------------------------------------------------------------
    # Number of features (PCs) per channel.
    nfeatures_per_channel = 3,
    # The number of spikes used to determine the PCs
    pca_nwaveforms_max = 10000,
    # ---------------------------------------------------------------------
    # Advanced
    # ---------------------------------------------------------------------
    # Number of samples to use in floodfill algorithm for spike detection
    #connected_component_join_size = 1, # 1 sample
    connected_component_join_size = int(.00005 * sample_rate), # 0.05ms
    # Waveform alignment
    weight_power = 2,
    # Whether to make the features array contiguous
    features_contiguous = True,
    )

#Mostafa: Don't know if the following section is working and what does it do?!
##############################################################
# KlustaKwik parameters (must be prefixed by KK_). Uncomment to override
# the defaults, which can be shown by running 'klustakwik' with no options
###############################################################

# This causes KlustaKwik to perform clustering on a subset of spikes and
# estimate the assignment of the other spikes. This causes a speedup in
# computational time (by a rough factor of KK_Subset), though will not
# significantly decrease RAM usage. For long runs where you are unsure of
# the data quality, you can first use KK_Subset = 50 to check the
# clustering quality before performing a Subset 1 (all spikes) run.
KK_Subset = 1

# The largest permitted number of clusters, so cluster splitting can produce
# no more than n clusters. Note: This must be set higher than MaskStarts.
KK_MaxPossibleClusters = 1000

# Maximum number of iterations. ie. it won't try more than n iterations
# from any starting point.
KK_MaxIter = 10000

# You can start with a chosen fixed number of clusters derived from the
# mask vectors, set by KK_MaskStarts.
KK_MaskStarts = 500

# The number of iterations after which KlustaKwik first attempts to split
# existing clusters. KlustaKwik then splits every SplitEvery iterations.
KK_SplitFirst = 20

# The number of iterations after which KlustaKwik attempts to split existing
# clusters. When using masked initializations, to save time due to excessive
# splitting, set SplitEvery to a large number, close to the number of distinct
# masks or the number of chosen starting masks.
KK_SplitEvery = 40

# KlustaKwik uses penalties to reduce the number of clusters fit. The parameters PenaltyK and PenaltyKLogN are 
# given positive values. The higher the values, the fewer clusters you obtain. Higher penalties
# discourage cluster splitting. PenaltyKLogN also increases penalty when there are more points. 
#-PenaltyK 0 -PenaltyKLogN 1 is the default, corresponding to the "Bayesian Information Criterion".
# -PenaltyK 1 -PenaltyKLogN 0 corresponds to "Akaike's Information Criterion". This produces a larger number 
# of clusters, and is recommended if you are find that clusters corresponding to different neurons are incorrectly merged.
KK_PenaltyK = 0.
KK_PenaltyKLogN = 1.

# Specifies a seed for the random number generator.
KK_RandomSeed = 1

# The number of unmasked spikes on a certain channel needed to unmask that
# channel in the cluster. This prevents a single noisy spike, or coincident
# noise on adjacent channels from slowing down computation time.
KK_PointsForClusterMask = 10

# Setting this saves a .temp.clu file every iteration. This slows the runtime
# down reasonably significantly for small runs with many iterations, but allows
# to recover where KlustaKwik left off; useful in case of large runs where you
# are not confident that the run will be uninterrupted.
KK_SaveTempCluEveryIter = 0

# This is an integer N when, used in combination with the empty string
# for UseFeatures above, omits the last N features. This should always
# be used with KK_UseFeatures = ""
KK_DropLastNFeatures = 0

# ---------------------------------------------------------------------
# Classic 'all channels unmasked always' mode | DO NOT uncomment
# ---------------------------------------------------------------------
# To use KlustaKwik in "unmasked" mode, set this to 0.
# This disables the use of the new `masked Expectation-Maximization'
# algorithm, and sets all the channels to be unmasked on all spikes.
#KK_UseDistributional = 1

# In classic mode, KlustaKwik starts from random cluster assignments,
# running a new random start for every integer between MinClusters and
# MaxClusters. For these values to take effect, MaskStarts must be set to 0.
#KK_MinClusters = 100
#KK_MaxClusters = 110

# By default, this is an empty string, which means 'use all features'.
# Or, you can you can specify a string with 1's for features you want to
# use, and 0's for features you don't want to use. In classic mode,
# you use this option to take out bad channels. In masked mode,
# you should instead take bad channels out from the .PRB file.
#KK_UseFeatures = ""

# ---------------------------------------------------------------------
# Advanced
# ---------------------------------------------------------------------
# The algorithm will be started n times for each initial cluster count
# between MinClusters and MaxClusters.
KK_nStarts = 1

# Saves means and covariance matrices. Stops computation at each iteration.
# Manual input required for continuation.
KK_SaveCovarianceMeans = 0

# Saves a .clu file with masks sorted lexicographically.
KK_SaveSorted = 0

# Initialises using distinct derived binary masks. Use together with
# AssignToFirstClosestMask below.
KK_UseMaskedInitialConditions = 0

# If starting with a number of clusters fewer than the number of distinct
# derived binary masks, it will assign the rest of the points to the cluster
# with the nearest mask.
KK_AssignToFirstClosestMask = 0

# All log-likelihoods are recalculated every KK_FullStepEvery steps
# (see DistThresh).
KK_FullStepEvery = 20
KK_MinMaskOverlap = 0.
KK_AlwaysSplitBimodal = 0

# ---------------------------------------------------------------------
# Debugging
# ---------------------------------------------------------------------
# Turns miscellaneous debugging information on.
KK_Debug = 0
# Increasing this to 2 increases the amount of information logged to
# the console and the log.
KK_Verbose = 1
# Outputs more debugging information.
KK_DistDump = 0
# Time-saving parameter. If a point has log likelihood more than
# DistThresh worse for a given class than for the best class, the log
# likelihood for that class is not recalculated. This saves an awful lot
# of time.
KK_DistThresh = 6.907755
# All log-likelihoods are recalculated if the fraction of instances
# changing class exceeds ChangedThresh (see DistThresh).
KK_ChangedThresh = 0.05
# Produces .klg log file (default is yes, to switch off do -Log 0).
KK_Log = 1
# Produces parameters and progress information on the console. Set to
# 0 to suppress output in batches.
KK_Screen = 1
# Helps normalize covariance matrices.
KK_PriorPoint = 1
# Outputs number of initial clusters.
KK_SplitInfo = 1

#No Ram Limit
KK_RamLimitGB = -1
    """%PlaceHolders
    try:
        with open(output_name,'w') as f:
            f.write(prm_content)
            print("PRM file created!")
            return True
    except:
        print("PRM file failed!")
        return False

#---------------------------------------------------------------------------------------------

if "__file__" not in dir():
    FilePath="/data/SWI0022/3/2016-09-27_15-47-38/"
    Exp='experiment1_100'
    Fs=30000
    nCh=27
    
    save_prm_file(FilePath,Exp,Fs,nCh,overwrite=True)

PRM file failed!


Functions to read and convert .continuous files

In [8]:
def loadFolderToArray(folderpath, channels = 'all', dtype = float, source = '100'):
    '''Load CH continuous files in specified folder to a single numpy array. By default all 
    CH continous files are loaded in numerical order, ordering can be specified with
    optional channels argument which should be a list of channel numbers.'''

    if channels == 'all': 
        channels = _get_sorted_channels(folderpath,'_CH')
        aux      = _get_sorted_channels(folderpath,'_AUX')

    filelist = [source + '_CH' + x + '.continuous' for x in map(str,channels)]
    filelist.extend([source + '_AUX' + x + '.continuous' for x in map(str,aux)])
    numFiles = 1

    print("Loading continuous files...")
    channel_1_data = loadContinuous(os.path.join(folderpath, filelist[0]), dtype)['data']

    n_samples  = len(channel_1_data)
    n_channels = len(filelist)

    data_array = np.zeros([n_samples, n_channels], dtype)
    data_array[:,0] = channel_1_data

    for i, f in enumerate(filelist[1:]):
            data_array[:, i + 1] = loadContinuous(os.path.join(folderpath, f), dtype)['data']
            numFiles += 1
           
    return data_array

def loadContinuous(filepath, dtype = float):

    assert dtype in (float, np.int16), \
      'Invalid data type specified for loadContinous, valid types are float and np.int16'


    ch = { }
    recordNumber = np.intp(-1)
    
    samples = np.zeros(MAX_NUMBER_OF_CONTINUOUS_SAMPLES, dtype)
    timestamps = np.zeros(MAX_NUMBER_OF_RECORDS)
    recordingNumbers = np.zeros(MAX_NUMBER_OF_RECORDS)
    indices = np.arange(0,MAX_NUMBER_OF_RECORDS*SAMPLES_PER_RECORD, SAMPLES_PER_RECORD, np.dtype(np.int64))
    
    #read in the data
    f = open(filepath,'rb')
    
    header = readHeader(f)
    
    fileLength = os.fstat(f.fileno()).st_size
   
    while f.tell() < fileLength:
        
        recordNumber += 1        
        
        timestamps[recordNumber] = np.fromfile(f,np.dtype('<i8'),1) # little-endian 64-bit signed integer 
        N = np.fromfile(f,np.dtype('<u2'),1)[0] # little-endian 16-bit unsigned integer
        
        #print index

        if N != SAMPLES_PER_RECORD:
            raise Exception('Found corrupted record in block ' + str(recordNumber))
        
        recordingNumbers[recordNumber] = (np.fromfile(f,np.dtype('>u2'),1)) # big-endian 16-bit unsigned integer
        
        if dtype == float: # Convert data to float array and convert bits to voltage.
            data = np.fromfile(f,np.dtype('>i2'),N) * float(header['bitVolts']) # big-endian 16-bit signed integer, multiplied by bitVolts   
        else:  # Keep data in signed 16 bit integer format.
            data = np.fromfile(f,np.dtype('>i2'),N)  # big-endian 16-bit signed integer
        try:
            samples[indices[recordNumber]:indices[recordNumber+1]] = data            
        except Exception as e:
            print("error reading ",filepath)
            print(repr(e))
            print("replacing missing values with zeros.")
            #raise
        
        marker = f.read(10) # dump
        
        
    ch['header'] = header 
    ch['timestamps'] = timestamps[0:recordNumber]
    ch['data'] = samples[0:indices[recordNumber]]  # OR use downsample(samples,1), to save space
    ch['recordingNumber'] = recordingNumbers[0:recordNumber]
    f.close()
    return ch
    
def readHeader(f):
    header = { }
    h = f.read(1024).decode().replace('\n','').replace('header.','')
    for i,item in enumerate(h.split(';')):
        if '=' in item:
            header[item.split(' = ')[0]] = item.split(' = ')[1]
    return header
    
def _get_sorted_channels(folderpath,sep='_CH'):
    return sorted([int(f.split(sep)[1].split('.')[0]) for f in os.listdir(folderpath) 
                    if '.continuous' in f and sep in f]) 



# constants
NUM_HEADER_BYTES = 1024
SAMPLES_PER_RECORD = 1024
RECORD_SIZE = 8 + 16 + SAMPLES_PER_RECORD*2 + 10 # size of each continuous record in bytes
RECORD_MARKER = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 255])

# constants for pre-allocating matrices:
MAX_NUMBER_OF_SPIKES = int(1e6)
MAX_NUMBER_OF_RECORDS = int(1e6)
MAX_NUMBER_OF_CONTINUOUS_SAMPLES = int(1e8)
MAX_NUMBER_OF_EVENTS = int(1e6)

def pack_continuous_files(folderpath, filename = 'openephys.dat', source='100', channels = 'all', dref = None):

    '''Alternative version of pack which uses numpy's tofile function to write data.
    pack_2 is much faster than pack and avoids quantization noise incurred in pack due
    to conversion of data to float voltages during loadContinous followed by rounding
    back to integers for packing.  
    source: string name of the source that openephys uses as the prefix. It is usually 100, 
            if the headstage is the first source added, but can specify something different.
    channels:  List of channel numbers specifying order in which channels are packed. By default
               all CH continous files are packed in numerical order.
    dref:  Digital referencing - either supply a channel number or 'ave' to reference to the 
           average of packed channels.
    '''

    data_array = loadFolderToArray(folderpath, channels, np.int16, source)

    if dref: 
        if dref == 'ave':
            print('Digital referencing to average of all channels.')
            reference = np.mean(data_array,1)
        else:
            print('Digital referencing to channel ' + str(dref))
            if channels == 'all': 
                channels = _get_sorted_channels(folderpath)
            reference = deepcopy(data_array[:,channels.index(dref)])
        for i in range(data_array.shape[1]):
            data_array[:,i] = data_array[:,i] - reference

    print('Packing data to file: ' + filename)
    data_array.tofile(os.path.join(folderpath,filename))
    print(".dat file created!")

-Function to create .eeg files

In [9]:
def create_eeg(inputName,nChannels=None,downsample=24,inputSamplingRate=30000):
    '''
    downsample the input file (raw.kwd or dat) and save it in .eeg
    eegData= rawData[::16,:] (save every 16 point for each channel)
    '''
    downsample=int(downsample)
    if inputName.endswith(".raw.kwd"):
        raw_data=KwdRawDataReader(inputName)
        outputName=inputName[:-7]+"eeg"
    elif inputName.endswith(".dat"):
        raw_data=DatRawDataReader(inputName,nChannels=nChannels)
        outputName=inputName[:-3]+"eeg"
        if nChannels is None:
            print("Error in create_eeg: nChannel snot defined, can't read .dat")
            return False
    else:
        print("Error in create_eeg: input file doesn't end with '.raw.kwd' or '.dat'")
        return False
    
    chunk_size=inputSamplingRate # 1 second
    with open(outputName,'wb') as output:
        for chunk in raw_data.chunks(chunk_size=chunk_size):
            chunk_raw = chunk.data_chunk_full # shape: (nsamples, nchannels)
            chunk_test=chunk_raw[::downsample,:]
            for k in range(len(chunk_test)):
                newFileByteArray=bytearray(chunk_test[k])
                output.write(newFileByteArray)
    print('\n.eeg file created at: %s'%(outputName))
    return True

-Function to save .dat, .prm, and .eeg files as a Batch

In [10]:
def dat_prm_eeg_file_save_batch(path,animalList,overwrite=False,saveEEG=False):
    
    for animal in animalList:
        animalFolder=os.path.join(path,animal)
        rawfile_list = find_file(animalFolder,['.raw.kwd'])
        for rawfile in rawfile_list:
            try:
                conversion_result= convert_kwd_to_dat(rawfile,overwrite=overwrite)
                if isinstance(conversion_result,bool):
                    print(".raw.kwd to .dat conversion failed:%s"%(rawfile))
                else:
                    nchannels, sampling_rate=conversion_result
                    experiment=os.path.splitext(os.path.splitext(os.path.split(rawfile)[1])[0])[0]
                    save_prm_file(os.path.split(rawfile)[0],experiment,sampling_rate,nchannels,overwrite=True)
                    if saveEEG:
                        create_eeg(rawfile,downsample=int(sampling_rate/1250),
                                   inputSamplingRate=sampling_rate)
            except Exception as e:
                print('Converting .dat/ saving .prm/.eeg file failed at:')
                print(rawfile)
                print(repr(e))
                print("\nConverting/saving failed")
                return False
        
        #check existence of .continuous files
        rawfile_list = find_file(animalFolder,['.continuous'])
        folder_list = list(set([os.path.dirname(f) for f in rawfile_list]))
        if len(folder_list) >0:
            for continuousFolder in folder_list:
                experiment=os.path.basename(continuousFolder)
                filename= experiment + ".dat"
                settingsPath=os.path.join(continuousFolder,"Continuous_Data.openephys")
                
                if not os.path.exists(settingsPath):
                    print (settingsPath)
                    raise FileNotFoundError(
                        "There must be a \"Continuous_Data.openephys.xml\"",
                        " file in the same folder as .continuous files to load the settings.")
                
                with open(settingsPath,'r') as f:
                    openEphysSettings=xmltodict.parse(f.read())
                    openEphysSettings=openEphysSettings['EXPERIMENT']['RECORDING']
                    if isinstance(openEphysSettings, list):
                        openEphysSettings=openEphysSettings[0]
                    sampling_rate =openEphysSettings['@samplerate']
                    source        =openEphysSettings['PROCESSOR']['@id']
                    nchannels     =len(openEphysSettings['PROCESSOR']['CHANNEL'])
                    
                    
                #convert to and save as .DAT file
                pack_continuous_files(continuousFolder, filename = filename, source=source, channels = 'all', dref = None)
                #save PRM file
                save_prm_file(continuousFolder,experiment,sampling_rate,nchannels,overwrite=True)
                if saveEEG:
                    create_eeg(rawfile,downsample=int(sampling_rate/1250),
                               inputSamplingRate=sampling_rate)        
    print("\nConverting/saving done")
    
        
#------------------------------------------------------------------------------------------
if "__file__" not in dir():
    Path="/data/SWI002/22/"
    animalList=[]
    
    dat_prm_eeg_file_save_batch(Path,animalList)
    

    


Converting/saving done


# ==========================================================
# Script to Rename and Convert as a Batch
##### 'root' must be a directory containing seperate folders for each animal(ex: /data/ containing /data/Rat001, /data/Rat002, ...)
##### 'animalList' determines on which folders within the 'root' this notebook will operate(ex: Rat001, Rat002, ...).
##### renaming MUST precede converting/saving

In [14]:
if "__file__" not in dir():
    #The directory containing separate folders for each animal
    root="/data/"
    #in Windows paths must be like this: root="C:\\Data\\Recordings\\" (double backslash instead of single)
    
    #Animal where to run the script
    animalList=["Rat105"]
    
    #Whether to save .eeg files for each .raw.kwd file
    saveEEG=True
    
    #Whether to overwrite the existing .dat files
    overwrite=False

    #Whether to print "X renamed in Y"
    verbose=True  

    #If a wrong folder and a regular folder are less than "minuteDelay" apart, they are merged
    # example: if minuteDelay=2, 'Rat001_2034_04_22-04_26_12' would be merged with 'Rat001_2034_04_22_04_27'
    minuteDelay=0

    #Files to rename (ex: "someFile.dat" -> "Rat024_2015_etc.dat")
    #extensionList=[".dat","+6.raw.kwd",".nrs",".kwx",".kwik",".prm",".prb"]
    extensionList=[".dat",".raw.kwd",".nrs",".kwe",".eeg"]
    
    #--------------------------------------------------------------------------------
    rename_batch(root,animalList,verbose,minuteDelay,extensionList)
    dat_prm_eeg_file_save_batch(root,animalList,overwrite,saveEEG)

animal Rat105: /data/Rat105/Experiments
*Rat105_2016_12_07_17_37
*Rat105_2016_12_07_15_09
*Rat105_2016_12_05_17_57
*Rat105_2016_12_06_11_13
*Rat105_2016_12_07_17_04

Renaming done
--------


openning /data/Rat105/Experiments/Rat105_2016_12_07_17_37/Rat105_2016_12_07_17_37.raw.kwd
number of channels: 37
sampling rate: 20000.0Hz
Converting...
wrote /data/Rat105/Experiments/Rat105_2016_12_07_17_37/Rat105_2016_12_07_17_37.dat
done
PRM file created!

.eeg file created at: /data/Rat105/Experiments/Rat105_2016_12_07_17_37/Rat105_2016_12_07_17_37.eeg


openning /data/Rat105/Experiments/Rat105_2016_12_07_15_09/Rat105_2016_12_07_15_09.raw.kwd
number of channels: 37
sampling rate: 30000.0Hz
Converting...
wrote /data/Rat105/Experiments/Rat105_2016_12_07_15_09/Rat105_2016_12_07_15_09.dat
done
PRM file created!

.eeg file created at: /data/Rat105/Experiments/Rat105_2016_12_07_15_09/Rat105_2016_12_07_15_09.eeg


openning /data/Rat105/Experiments/Rat105_2016_12_05_17_57/Rat105_2016_12_05_17_57.raw.kwd