In [2]:
import os
import sys
import shutil
import tables as tb
import glob
from datetime import datetime,timedelta
from random import randint

import numpy as np
from scipy import signal
#-----------------------------------------------------------------------------
# NAME FORMAT for sessions
#-----------------------------------------------------------------------------

# All the possible wrong formats
# "name of format": (glob pattern, datetime pattern)

#  put "animal" for animal name

# for info on datetime pattern: http://strftime.org/ 
# for info on glob pattern:  https://pymotw.com/2/glob/

WRONG_FORMATS={
    "openEphys":(
        "animal_20{0}-{0}-{0}_{0}-{0}-{0}".format("[00-99]?"),    #ex: 'Rat001_2022-02-21_11-28-17'
        "animal_%Y-%m-%d_%H-%M-%S"
    ),
    "seconds":(
        "animal_20{0}_{0}_{0}_{0}_{0}_{0}".format("[00-99]?"),    #ex: 'Rat001_2013_12_26_20_47_54'
        "animal_%Y_%m_%d_%H_%M_%S"
    ),
    "tiret":(
        "animal_20{0}_{0}_{0}-{0}_{0}".format("[00-99]?"),        #ex: 'Rat001_2039_10_15-06_27'
        "animal_%Y_%m_%d-%H_%M"
    ),
    "tiretSeconds":(
        "animal_20{0}_{0}_{0}-{0}_{0}_{0}".format("[00-99]?"),    #ex: 'Rat001_2033_06_23-21_24_46'
        "animal_%Y_%m_%d-%H_%M_%S"
    ),
    "openEphys2":(
        "20{0}-{0}-{0}_{0}-{0}-{0}".format("[00-99]?"),           #ex: '2015-02-15_11-59-48'
        "%Y-%m-%d_%H-%M-%S"
    ),
    }


# The right format 
REGULAR_FORMAT=("animal_20{0}_{0}_{0}_{0}_{0}".format("?[00-99]"),  #ex: 'Rat001_2039_10_15_06_27'
                "animal_%Y_%m_%d_%H_%M")


### Functions to find session matching format, extract the date, rename, merge...

In [3]:
def rename_folder(folderToRemove,newFolder):
    '''
    folderToRemove (=old), newFolder: full paths to folders
    merge the folders if "newFolder" already exists
    files are renamed (example: folderToRemove_1.txt becomes newFolder_1.txt)
    '''
    #if the new folder doesn't exist, create it
    if not os.path.exists(newFolder):
        os.mkdir(newFolder) 
    #move and rename the files 
    for f in os.listdir(folderToRemove):
        oldpath=os.path.join(folderToRemove,f)
        oldFolderName=os.path.basename(folderToRemove.rstrip(os.sep))
        newFolderName=os.path.basename(newFolder.rstrip(os.sep))
        f=f.replace(oldFolderName,newFolderName)
        newpath=os.path.join(newFolder,f)
        if os.path.exists(newpath):
            os.remove(oldpath)
        else:
            os.rename(oldpath,newpath)
    os.rmdir(folderToRemove)
    
def get_folders_matching_format(rootFolder,globFormat):
    '''
    rootFolder: full path where to look for session folders (ex: "/data/Rat034/Experiments")
    '''
    fullFormat=os.path.join(rootFolder,globFormat)
    return [os.path.basename(f) for f in glob.glob(fullFormat)]

def get_regular_name(wrongName,wrongTimeFormat,regularTimeFormat):
    '''
    from a session name in wrong format, return the regular format
    '''
    date=datetime.strptime(wrongName,wrongTimeFormat)
    return date.strftime(regularTimeFormat)

def get_date(name,timeFormat):
    '''
    return a datetime object from a session name and a format
    '''
    date=datetime.strptime(name,timeFormat)
    return date

def get_regular_name_from_date(date,regularTimeFormat):
    '''
    return a name given a date and a format
    '''
    return date.strftime(regularTimeFormat)

def rename_all_in_rootFolder(rootFolder,wrongGlobFormat,wrongTimeFormat,regularGlobFormat,
                             regularTimeFormat,minuteDelay=2,verbose=False):
    '''
    rootFolder: full path where to look for session folders (ex: "/data/Rat034/Experiments")
    wrongGlobFormat: glob pattern to search for the wrong session names
    wrongTimeFormat: datetime pattern to read date from wrong session names
    regularGlobFormat, regularTimeFormat: glob and datetime patterns for the right session names
    minuteDelay: if two folders have the same date (give or take minuteDelay), merge them
    '''
    delay=timedelta(minutes=minuteDelay)

    #get names and date of the regular folders
    regularFolders=get_folders_matching_format(rootFolder,regularGlobFormat)
    allRegularDates=[get_date(name,regularTimeFormat) for name in regularFolders]
    
    #get names of wrong folders
    fList=get_folders_matching_format(rootFolder,wrongGlobFormat)
    for f in fList:
        merge=False
        #check if there is a regular folder around the same date
        date=get_date(f,wrongTimeFormat)
        for otherDate in allRegularDates:
            if abs(date-otherDate)<delay:
                date=otherDate
                merge=True
                break
        #new name
        newFolder=get_regular_name_from_date(date,regularTimeFormat)
        #rename/merge
        newPath=os.path.join(rootFolder,newFolder)
        oldPath=os.path.join(rootFolder,f)
        rename_folder(oldPath,newPath)
        if verbose:
            if merge:
                print("Merged %s into %s"%(f,newFolder))
            else:
                print("Renamed %s in %s"%(f,newFolder))
                
def rename_files_to_match_folder(folderPath,extensionList):
    '''
    folderPath= "/data/Rat034/Experiments/Rat034_2015_etc"
    extensionList= [".dat", ".prm", ".kwik"]
    --> Renames "someFile.dat" into "Rat024_2015_etc.dat"
    '''
    folderName=os.path.basename(folderPath.rstrip(os.sep))
    extensionList=[ext if ext.startswith(".") else "."+ext for ext in extensionList]
    for f in os.listdir(folderPath):
        for ext in extensionList:
            if f.endswith(ext):
                oldPath=os.path.join(folderPath,f)
                newName=folderName+ext
                newPath=os.path.join(folderPath,newName)
                os.rename(oldPath,newPath)
                break

### Test on fake data
Test renaming folders and files

In [None]:
if "__file__" not in dir():
    animal="Zebre001"

    #Create a test root folder
    testFolder="testFormatConversion"    #in reality "/data/animalXXX/Experiments"
    if not os.path.exists(testFolder):
        os.mkdir(testFolder)
    else:
        #if it already exist, empty it
        for f in os.listdir(testFolder):
            shutil.rmtree(os.path.join(testFolder,f))
    root=testFolder   

    #function to create a fake folder in a time formats
    def create_fake_folder(timeFormat,withRegular=False,regularTime=None,date=None):
        if date is None:
            date=datetime(randint(2010,2040),randint(1,12),randint(1,28),randint(0,23),randint(0,59),randint(0,59))
        fakeFolder=os.path.join(testFolder,date.strftime(timeFormat))
        if os.path.exists(fakeFolder):
            return
        os.mkdir(fakeFolder)
        #put some files
        for f in [".txt",".data2",".entrance",".param"]:
            path=os.path.join(fakeFolder,date.strftime(timeFormat)+f)
            os.mknod(path)
        #optional: create a regular folder one minute apart
        if withRegular and (regularTime is not None):
            date= date + timedelta(minutes = 1)
            create_fake_folder(regularTime,date=date)

    #Create regular fake folder
    regularGlob=REGULAR_FORMAT[0].replace("animal",animal)
    regularTime=REGULAR_FORMAT[1].replace("animal",animal)
    create_fake_folder(regularTime)

    #Create wrong fake folders
    #With a regular one minute apart for some formats
    for index,name in enumerate(WRONG_FORMATS):
        timeFormat=WRONG_FORMATS[name][1].replace("animal",animal)
        create_fake_folder(timeFormat,withRegular=index%2,regularTime=regularTime) 

    #Detect wrong formats
    print("DETECTION OF WRONG FORMATS")
    print("##########################")
    for name in WRONG_FORMATS:
        globFormat=WRONG_FORMATS[name][0].replace("animal",animal)
        timeFormat=WRONG_FORMATS[name][1].replace("animal",animal)
        print("  Format %s"%(name))
        print("  -----")
        fList=get_folders_matching_format(testFolder,globFormat)
        for f in fList:
            print("'%s' to rename in '%s'"%(f,get_regular_name(f,timeFormat,regularTime)))
        print()

    #Rename everything
    print("RENAME FOR EACH WRONG FORMAT")
    print("##########################")
    for name in WRONG_FORMATS:
        globFormat=WRONG_FORMATS[name][0].replace("animal",animal)
        timeFormat=WRONG_FORMATS[name][1].replace("animal",animal)
        rename_all_in_rootFolder(testFolder,globFormat,timeFormat,regularGlob,regularTime,minuteDelay=2,verbose=True)

    #delete test folder
    shutil.rmtree(testFolder)

### Code to read data by chunk, taken from Klusta source code

In [18]:
#------------------------------------- from six.py, for python2/3 compatibility
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
if PY3:
    string_types = str,
    integer_types = int,
    text_type = str
    binary_type = bytes
    MAXSIZE = sys.maxsize
else:
    string_types = basestring,
    integer_types = (int, long)
    text_type = unicode
    binary_type = str
# -------------------------------------------------------------- chunck class from Klusta
# reads data chunk by chunk (instead of all at once, as the files are big)
class Chunk(object):
    def __init__(self, data=None, nsamples=None, nchannels=None,
                 bounds=None, dtype=None, recording=0, nrecordings=1):
        self._data = data
        if nsamples is None and nchannels is None:
            nsamples, nchannels = data.shape
        self.nsamples = nsamples
        self.nchannels = nchannels
        self.dtype = dtype
        self.recording = recording
        self.nrecordings = nrecordings
        self.s_start, self.s_end, self.keep_start, self.keep_end = bounds
        self.window_full = self.s_start, self.s_end
        self.window_keep = self.keep_start, self.keep_end      
    @property
    def data_chunk_full(self):
        chunk = self._data[self.s_start:self.s_end,:]
        return convert_dtype(chunk, self.dtype)   
    @property
    def data_chunk_keep(self):
        chunk =  self._data[self.keep_start:self.keep_end,:]
        return convert_dtype(chunk, self.dtype)

def chunk_bounds(nsamples, chunk_size, overlap=0):
    s_start = 0
    s_end = chunk_size
    keep_start = s_start
    keep_end = s_end - overlap // 2
    yield s_start, s_end, keep_start, keep_end    
    while s_end - overlap + chunk_size < nsamples:
        s_start = s_end - overlap
        s_end = s_start + chunk_size
        keep_start = keep_end
        keep_end = s_end - overlap // 2
        if s_start < s_end:
            yield s_start, s_end, keep_start, keep_end        
    s_start = s_end - overlap
    s_end = nsamples
    keep_start = keep_end
    keep_end = s_end
    if s_start < s_end:
        yield s_start, s_end, keep_start, keep_end

def convert_dtype(data, dtype=None, factor=None):
    if not dtype:
        return data
    if data.shape[0] == 0:
        return data.astype(dtype)
    dtype_old = data.dtype
    if dtype_old == dtype:
        return data
    key = (_get_dtype(dtype_old), _get_dtype(dtype))
    factor = factor or _dtype_factors.get(key, 1)
    if dtype_old in (np.float32, np.float64):
        factor = factor/np.abs(data).max()
    if factor != 1:
        return (data * factor).astype(dtype)
    else:
        return data.astype(dtype) 
#------------------------------------------------------------------------------ open a file, from kwiklib 
def open_file(path,mode="r"):
    try:
        f = tb.open_file(path, mode)
        return f
    except IOError as e:
        warn("IOError: " + str(e.message))
        return
# ----------------------------------------------------------------------------- KWD and .dat data reader from Klusta
class BaseRawDataReader(object):
    def __init__(self, dtype_to=np.int16):
        self.dtype_to = dtype_to
        self.nrecordings = 1    
    def next_recording(self):
        for self.recording in range(self.nrecordings):
            yield self.recording, self.get_recording_data(self.recording)    
    def get_recording_data(self, recording):
        raise NotImplementedError()    
    def chunks(self, chunk_size=None, chunk_overlap=0):
        for recording, data in self.next_recording():
            assert chunk_size is not None, "You need to specify a chunk size."""
            for bounds in chunk_bounds(data.shape[0], chunk_size=chunk_size, overlap=chunk_overlap):
                yield Chunk(data, bounds=bounds, dtype=self.dtype_to, recording=recording, nrecordings=self.nrecordings)
class KwdRawDataReader(BaseRawDataReader):
    def __init__(self, filename, dtype_to=np.int16):
        self.kwd = open_file(filename, 'r')
        #self.nrecordings = self.kwd.root.recordings._v_nchildren
        super(KwdRawDataReader, self).__init__(dtype_to=dtype_to)        
    def get_recording_data(self, recording):
        data = self.kwd.root.recordings._f_get_child(str(recording)).data
        return data 
    
class DatRawDataReader(BaseRawDataReader):
    def __init__(self, filename, nChannels,dtype=np.int16, dtype_to=np.int16):
        self.filename = filename
        self.nrecordings = 1
        self.dtype = np.dtype(dtype)        
        self.dtype_to = np.dtype(dtype_to)        
        self.nchannels = nChannels        
    def get_recording_data(self, recording):
        filename = self.filename
        # Find file size.
        size = os.stat(filename).st_size
        row_size = self.nchannels * self.dtype.itemsize
        if size % row_size != 0:
            raise ValueError(("Shape error: the file {f} has S={s} bytes, "
                "but there are C={c} channels. C should be a divisor of S."
                "").format(f=filename, s=size, c=self.nchannels))
        self.nsamples = size // row_size
        shape = (self.nsamples, self.nchannels)
        data = np.memmap(filename, dtype=self.dtype,mode='r',offset=0,shape=shape)
        return data
#-------------------------------------------------------------------------------
def create_eeg(inputName,nChannels=None,downsample=16,inputSamplingRate=20000):
    '''
    downsample the input file (raw.kwd or dat) and save it in .eeg
    eegData= rawData[::16,:] (save every 16 point for each channel)
    '''
    downsample=int(downsample)
    if inputName.endswith(".raw.kwd"):
        raw_data=KwdRawDataReader(inputName)
        outputName=inputName[:-7]+"eeg"
    elif inputName.endswith(".dat"):
        raw_data=DatRawDataReader(inputName,nChannels=nChannels)
        outputName=inputName[:-3]+"eeg"
        if nChannels is None:
            print("Error in create_eeg: nChannel snot defined, can't read .dat")
            return False
    else:
        print("Error in create_eeg: input file doesn't end with '.raw.kwd' or '.dat'")
        return False
    
    chunk_size=inputSamplingRate # 1 second
    with open(outputName,'wb') as output:
        for chunk in raw_data.chunks(chunk_size=chunk_size):
            chunk_raw = chunk.data_chunk_full # shape: (nsamples, nchannels)
            chunk_test=chunk_raw[::downsample,:]
            for k in range(len(chunk_test)):
                newFileByteArray=bytearray(chunk_test[k])
                output.write(newFileByteArray)
    return True

### Convert one .raw.kwd file to .dat file

In [19]:
def convert_kwd_to_dat(filename,outputname=None,overwrite=False):
    #output file name
    if outputname is None:
        outputname=filename[:-7]+"dat"
    out=os.path.basename(outputname)
    if os.path.exists(outputname):
        if overwrite:
            print("%s already exist, it will be overwritten"%(out))
        else:
            print("%s already exist, no conversion to do"%(out))
            return True
    
    if filename.endswith('.raw.kwd'):
        #read number of channels in kwd file
        with tb.open_file(filename,"r") as kwd:
            print("openning %s"%(filename))
            data=kwd.get_node("/recordings/0/data").read()
            nchannels=len(data[0])
            print("number of channels: %s, converting..."%(nchannels))
        #instantiate reader
        kwd_data=KwdRawDataReader(filename)
    else:
        print("Error: the raw data file doesn't end with '.raw.kwd'")
        return False

    #create dat file, open write only in binary
    with open(outputname,'wb') as output:
        chunk_size=20000
        for chunk in kwd_data.chunks(chunk_size):
            data=chunk.data_chunk_full
            for i in range(len(data)):
                newFileByteArray=bytearray(data[i])
                output.write(newFileByteArray)
    kwd_data.kwd.close()
    print("wrote %s"%(outputname))
    print("done")
    return True

#--------------------------------------------------------------------------------------------------
if "__file__" not in dir():
    inputFile="/home/david/TEMP/TestData/OpenEphys/SWItest/SWItest_2016_09_13_16_14/SWItest_2016_09_13_16_14.raw.kwd"
    deleteKWD=False
    overwrite=True

    convert_kwd_to_dat(inputFile,overwrite=overwrite)

    if deleteKWD and os.path.exists(inputFile):
        print("Delete %s"%os.path.basename(inputFile))
        os.remove(inputFile)

openning /home/david/TEMP/TestData/OpenEphys/SWItest/SWItest_2016_09_13_16_14/SWItest_2016_09_13_16_14.raw.kwd
number of channels: 9, converting...
wrote /home/david/TEMP/TestData/OpenEphys/SWItest/SWItest_2016_09_13_16_14/SWItest_2016_09_13_16_14.dat
done


### Batch: Rename folders and convert all raw.kwd into dat
Mainly to convert openEphys folder to our regular folder

Potential issues:

  - "settings.xml" from openEphys is not compatible with .xml from neuroscope
  - once a .raw.kwd is converted in .dat, there's no information about nChannel,  
     or maybe in SIGNALCHAIN/PROCESSOR../EDITOR/LFPDISPLAY/ChannelDisplayState (settings.xml)
  

In [None]:
if "__file__" not in dir():
    root="/data"
    #Animal where to run the script
    animalList=["RatM001"]

    #Whether to print "X renamed in Y"
    verbose=True  

    #If a wrong folder and a regular folder are less than "minuteDelay" apart, they are merged
    # example: if minuteDelay=2, 'Rat001_2034_04_22-04_26_12' would be merged with 'Rat001_2034_04_22_04_27'
    minuteDelay=2

    #Whether to delete the .raw.kwd once its converted
    deleteKWD=False

    #Whether to overwrite the .dat if it exists already
    overwrite=False

    #File to rename (ex: "someFile.dat" -> "Rat024_2015_etc.dat")
    #extensionList=[".dat","+6.raw.kwd",".nrs",".kwx",".kwik",".prm",".prb"]
    extensionList=[".dat",".raw.kwd",".nrs",".kwx",".kwik",".eeg"]

    #--------------------------------------------------------------------------------
    for animal in animalList:
        rootFolder=os.path.join(root,animal,"Experiments")
        print("animal %s: %s"%(animal,rootFolder))

        #RENAME
        regularGlob=REGULAR_FORMAT[0].replace("animal",animal)
        regularTime=REGULAR_FORMAT[1].replace("animal",animal)
        for name in WRONG_FORMATS:
            globFormat=WRONG_FORMATS[name][0].replace("animal",animal)
            timeFormat=WRONG_FORMATS[name][1].replace("animal",animal)       
            #print(get_folders_matching_format(rootFolder,globFormat))
            rename_all_in_rootFolder(rootFolder,globFormat,timeFormat,regularGlob,regularTime,
                                     minuteDelay=minuteDelay,verbose=verbose)

        #CONVERT
        #for every session folder of the animal
        for folder in os.listdir(rootFolder):
            if not folder.startswith(animal):
                continue
            print("*"+folder)
            path=os.path.join(rootFolder,folder)
            #rename files
            rename_files_to_match_folder(path,extensionList)

            for f in os.listdir(path):
                fpath=os.path.join(path,f)
                #convert raw.kwd
                if f.endswith(".raw.kwd"):
                    sucess=convert_kwd_to_dat(fpath,overwrite=overwrite)
                    if sucess and deleteKWD and os.path.exists(fpath):
                        os.remove(fpath) 
                        if verbose:
                            print("Delete %s"%os.path.basename(fpath))
                elif f=="settings.xml":
                    newPath=os.path.join(path,"settingsOpenEphys.xml")
                    os.rename(fpath,newPath)

        print("done")
        print("--------")

### Create an eeg from one dat or raw.kwd

In [None]:
if "__file__" not in dir():
    inputName="/data/MOU025/Experiments/MOU025_2014_08_22_15_48/MOU025_2014_08_22_15_48.dat"
    nChannels=36
    create_eeg(inputName,nChannels=nChannels,downsample=16,inputSamplingRate=20000)