In [1]:
import sys
import itertools
import os

import numpy as np
import pandas as pd
import scipy
from scipy.signal import savgol_filter

import neuroseries as nts

from progressbar import ProgressBar

import time
import matplotlib.pyplot as plt


import seaborn as sns 
import bk.load
import bk.signal
import bk.plot
import importlib; 
importlib.reload(bk.signal)
importlib.reload(bk.plot)
%matplotlib qt
import basefunction.anass_load as at

In [42]:
bk.load.path = '/media/billel/GGLab-01-BK/DATA/Rat08-20130713/'
bk.load.session = 'Rat08-20130713'

session = 'Rat08-20130713' 
neurons,shanks = loadSpikeData(bk.load.path)

Data already saved in Numpy format, loading them from here:
LoadingRat08-20130713-neurons.npy
LoadingRat08-20130713-NeuronsShanks.npy


In [39]:
neurons[0].as_units('s')

Time (s)
24.42490      NaN
82.58885      NaN
101.86240     NaN
138.35750     NaN
138.65065     NaN
               ..
22663.57715   NaN
22672.92575   NaN
22675.11920   NaN
22675.61620   NaN
22676.91705   NaN
Length: 15784, dtype: float64

In [43]:
def loadSpikeData(path, index=None, fs = 20000):  
    ### Adapted from Viejo github https://github.com/PeyracheLab/StarterPack/blob/master/python/wrappers.py
    ### Modified by BK 06/08/20
    ### Modification are explicit with comment
    """
    if the path contains a folder named /Analysis, 
    the script will look into it to load either
        - SpikeData.mat saved from matlab
        - SpikeData.h5 saved from this same script
    if not, the res and clu file will be loaded 
    and an /Analysis folder will be created to save the data
    Thus, the next loading of spike times will be faster
    Notes :
        If the frequency is not givne, it's assumed 20kH
    Args:
        path : string

    Returns:
        dict, array    
    """
    
#     try session:
#     except: print('Did you load a session first?')
    
    
    if not os.path.exists(path):
        print("The path "+path+" doesn't exist; Exiting ...")
        sys.exit()
    if os.path.exists(path + '//' + session +'-neurons.npy'):
        print('Data already saved in Numpy format, loading them from here:')
        print(session +'-neurons.npy')
        neurons = np.load(path+'//' + session +'-neurons.npy',allow_pickle=True)
        print(session +'-NeuronsShanks.npy')
        shanks = np.load(path+'//' + session +'-neuronsShanks.npy',allow_pickle=True)
        
        return neurons,shanks
                      
    files = os.listdir(path)
    # Changed 'clu' to '.clu.' same for res as in our dataset we have file containing the word clu that are not clu files
    clu_files     = np.sort([f for f in files if '.clu.' in f and f[0] != '.'])
    res_files     = np.sort([f for f in files if '.res.' in f and f[0] != '.'])
    
    # Changed because some files have weird names in GG dataset because of some backup on clu/res files
    # Rat10-20140627.clu.10.07.07.2014.15.41 for instance
    
    clu_files = clu_files[[len(i) < 22 for i in clu_files]]
    res_files = res_files[[len(i) < 22 for i in res_files]]
    

    clu1         = np.sort([int(f.split(".")[-1]) for f in clu_files])
    clu2         = np.sort([int(f.split(".")[-1]) for f in res_files])
    
#     if len(clu_files) != len(res_files) or not (clu1 == clu2).any():
#         print("Not the same number of clu and res files in "+path+"; Exiting ...")
#         sys.exit()
#   Commented this because in GG dataset their .clu.12.54.21.63 files that mess up everything ...
    
    count = 0
    spikes = []
    basename = clu_files[0].split(".")[0]
    idx_clu_returned = []
    for i, s in zip(range(len(clu_files)),clu1):
        clu = np.genfromtxt(os.path.join(path,basename+'.clu.'+str(s)),dtype=np.int32)[1:]
        print('Loading '+basename + '.clu.' + str(s))
        if np.max(clu)>1:
            res = np.genfromtxt(os.path.join(path,basename+'.res.'+str(s)))
            tmp = np.unique(clu).astype(int)
            idx_clu = tmp[tmp>1]
            idx_clu_returned.extend(idx_clu) # Allow to return the idx of each neurons on it's shank. Very important for traceability
            idx_col = np.arange(count, count+len(idx_clu))       
            tmp = pd.DataFrame(index = np.unique(res)/fs,
                                columns = pd.MultiIndex.from_product([[s],idx_col]),
                                data = 0, 
                                dtype = np.uint16)
            
            for j, k in zip(idx_clu, idx_col):
                tmp.loc[res[clu==j]/fs,(s,k)] = np.uint16(k+1)
            spikes.append(tmp)
            count+=len(idx_clu)

    #Returning a list instead of dict in order to use list of bolean.
    toreturn =  []
    shank = []
    for s in spikes:
        shank.append(s.columns.get_level_values(0).values)
        sh = np.unique(shank[-1])[0]
        for i,j in s:
            toreturn.append(nts.Tsd(t=s[(i,j)].replace(0,np.nan).dropna().index.values, time_units = 's'))
            #To return was change to nts.Tsd instead of nts.Ts as it has bug for priting (don't know where it is coming from)

    del spikes
    shank = np.hstack(shank)
    
    neurons = np.array(toreturn,dtype = 'object')
    shanks = np.array([shank, idx_clu_returned]).T
    
    print()
    print('Saving data in Numpy format :')
    
    print('Saving ' + session +'-neurons.npy')
    np.save(path + '//' + session + '-neurons',neurons)
    
    print('Saving ' + session +'-neuronsShanks.npy')
    np.save(path + '//' + session + '-neuronsShanks',shanks)
                      
    return neurons,shanks  #idx_clu is returned in order to keep indexing consistent with Matlab code.