In [2]:
import matplotlib.pyplot as plt
from os.path import join
import dask
import numpy as np  
import pandas as pd
from scipy.io import loadmat
from collections import namedtuple
import mrestimator as mre

import loren_frank_data_processing
from loren_frank_data_processing.core import logger, get_epochs, get_data_structure, reconstruct_time
from loren_frank_data_processing.tetrodes import get_trial_time, _get_tetrode_id
from loren_frank_data_processing.neurons import make_neuron_dataframe, convert_neuron_epoch_to_dataframe, _add_to_dict, _get_neuron_id
import loren_frank_data_processing.core

In [3]:
Animal = namedtuple('Animal', {'directory', 'short_name'})
conley = Animal('C:\Conley', 'con')
bond = Animal('C:\Bond.tar\Bond', 'bon')
chapati = Animal('C:\Chapati', 'cha')
dudley = Animal('C:\Dudley', 'dud')
frank = Animal('C:\Frank', 'fra')
dave = Animal('C:\Dave', 'dav')
corriander = Animal('C:\Corriander', 'Cor')
animals = {'fra': Animal('C:\Frank\\fra', 'fra'),'dud': Animal('C:\Dudley\\dud', 'dud'), 'dav': Animal('C:\Dave\\dav', 'dav'), 'con': Animal('C:\Conley\\con', 'con'), 'Cor': Animal('Cor','C:\Corriander\\Cor'),'cha': Animal('C:\Chapati\\cha', 'cha'),'bon': Animal('C:\Bond.tar\Bond\\bon', 'bon')} 

In [8]:
#the following functions from 'loren_frank_data_processing'got modified, with changes indicated
def get_LFP_dataframe(tetrode_key, animals):
    '''Gets the LFP data for a given epoch and tetrode.
    Parameters
    ----------
    tetrode_key : tuple
        Unique key identifying the tetrode. Elements are
        (animal_short_name, day, epoch, tetrode_number).
    animals : dict of named-tuples
        Dictionary containing information about the directory for each
        animal. The key is the animal_short_name.
    Returns
    -------
    LFP : pandas dataframe
        Contains the electric potential and time
    '''
    try:
        lfp_file = loadmat(get_LFP_filename_modified(tetrode_key, animals))
        lfp_data = lfp_file['eeg'][0, -1][0, -1][0, -1]
        lfp_time = reconstruct_time(
            lfp_data['starttime'][0, 0].item(),
            lfp_data['data'][0, 0].size,
            float(lfp_data['samprate'][0, 0].squeeze()))
        return pd.Series(
            data=lfp_data['data'][0, 0].squeeze().astype(float),
            index=lfp_time,
            name='{0}_{1:02d}_{2:02}_{3:03}'.format(*tetrode_key))
    except (FileNotFoundError, TypeError):
        logger.warning('Failed to load file: {0}'.format(
            get_LFP_filename_modified(tetrode_key, animals)))
        
def get_tetrode_info_path(animal):
    '''Returns the Matlab tetrode info file name assuming it is in the
    Raw Data directory.
    Parameters
    ----------
    animal : namedtuple
        First element is the directory where the animal's data is located.
        The second element is the animal shortened name.
    Returns
    -------
    filename : str
        The path to the information about the tetrodes for a given animal.
    '''
    #filename = '{animal.short_name}tetinfo.mat'.format(animal=animal)
    filename = 'tetinfo.mat'
    return join(animal.directory, filename)

def make_tetrode_dataframe(animals, epoch_key=None):
    """Information about all tetrodes such as recording location.
    Parameters
    ----------
    animals : dict of named-tuples
        Dictionary containing information about the directory for each
        animal. The key is the animal_short_name.
    Returns
    -------
    tetrode_infomation : pandas.DataFrame
    """
    tetrode_info = []
    if epoch_key is not None:
        animal, day, epoch = epoch_key
        file_name = 'C:\\Frank\\fratetinfo.mat'
        tet_info = loadmat(file_name, squeeze_me=True)["tetinfo"]
        tetrode_info.append(
            convert_tetrode_epoch_to_dataframe(
                tet_info[day - 1][epoch - 1], epoch_key))
        return pd.concat(tetrode_info, sort=True)

    for animal in animals.values():
        file_name = get_tetrode_info_path(animal)
        tet_info = loadmat(file_name, squeeze_me=True)["tetinfo"]
        try:
            for day_ind, day in enumerate(tet_info):
                try:
                    for epoch_ind, epoch in enumerate(day):
                        epoch_key = (
                            animal.short_name,
                            day_ind + 1,
                            epoch_ind + 1,
                        )  # noqa
                        tetrode_info.append(
                            convert_tetrode_epoch_to_dataframe(
                                epoch, epoch_key)
                        )
                except IndexError:
                    pass
        except TypeError:
            # Only one day of recording
            try:
                day_ind = 0
                for epoch_ind, epoch in enumerate(tet_info):
                    epoch_key = animal.short_name, day_ind + 1, epoch_ind + 1
                    tetrode_info.append(
                        convert_tetrode_epoch_to_dataframe(epoch, epoch_key))
            except IndexError:
                pass

    return pd.concat(tetrode_info, sort=True)


def get_LFP_filename_modified(tetrode_key, animals):

    '''Returns a file name for the tetrode file LFP for an epoch.
    Parameters
    ----------
    tetrode_key : tuple
        Unique key identifying the tetrode. Elements are
        (animal_short_name, day, epoch, tetrode_number).
    animals : dict of named-tuples
        Dictionary containing information about the directory for each
        animal. The key is the animal_short_name.
    Returns
    -------
    filename : str
        File path to tetrode file LFP
    '''
    animal, day, epoch, tetrode_number = tetrode_key
    filename = ('{animal.short_name}eeg{day:02d}-{epoch}-'
                '{tetrode_number:02d}.mat').format(
                    animal=animals[animal], day=day, epoch=epoch,
                    tetrode_number=tetrode_number)
    if animal == 'con':
        filename = 'C:\Conley' + '\EEG\\' + filename[-18:]
        
    if animal == 'bon':
        filename = 'C:\Bond.tar\Bond' + filename[:-18] + '\EEG\\' + filename[-18:]
    
    if animal == 'cha':
        filename = 'C:\Chapati' + filename[:-18] + '\EEG\\' + filename[-18:]
        
    if animal == 'Cor':
        filename = 'C:\Corriander' + '\EEG\\' + 'Cor'+ filename[-14:]
        
    if animal == 'dav':
        filename = 'C:\Dave' + filename[:-18] + '\EEG\\' + filename[-18:]
        
    if animal == 'dud':
        filename = filename[:-18] + '\EEG\\' + filename[-18:] 
        
    if animal == 'fra':
        filename = filename[:-18] + '\EEG\\' + filename[-18:] 
    
    return filename



def get_trial_time(epoch_key, animals):
    """Time in the recording session in terms of the LFP.
    This will return the LFP time of the first tetrode found (according to the
    tetrode info). This is useful when there are slightly different timings
    for the recordings and you need a common time.
    Parameters
    ----------
    epoch_key : tuple
        Unique key identifying a recording epoch with elements
        (animal, day, epoch)
    animals : dict of named-tuples
        Dictionary containing information about the directory for each
        animal. The key is the animal_short_name.
    Returns
    -------
    time : pandas.Index
    """
    tetrode_info = make_tetrode_dataframe(animals, epoch_key=epoch_key)
    for tetrode_key in tetrode_info.index:
        lfp_df = get_LFP_dataframe(tetrode_key, animals)
        if lfp_df is not None:
            break

    return lfp_df.index.rename("time")


def get_spikes_dataframe_mod(neuron_key, animals):
    '''Spike times for a particular neuron.

    Parameters
    ----------
    neuron_key : tuple
        Unique key identifying that neuron. Elements of the tuple are
        (animal_short_name, day, epoch, tetrode_number, neuron_number).
        Key can be retrieved from `make_neuron_dataframe` function.
    animals : dict of named-tuples
        Dictionary containing information about the directory for each
        animal. The key is the animal_short_name.

    Returns
    -------
    spikes_dataframe : pandas.DataFrame
    '''
    animal, day, epoch, tetrode_number, neuron_number = neuron_key
    neuron_file = []
    spike_time = []
    try:
        neuron_file = loadmat(
            get_data_filename(animals[animal], day, 'spikes'))
        
    except (FileNotFoundError, TypeError):
        logger.warning('Failed to load file: {0}'.format(
            get_data_filename(animals[animal], day, 'spikes')))
    if neuron_file != []:
        try:
            spike_time = neuron_file['spikes'][0, -1][0, epoch - 1][
            0, tetrode_number - 1][0, neuron_number - 1][0]['data'][0][
            :, 0]
            spike_time = pd.TimedeltaIndex(spike_time, unit='s', name='time')
        except IndexError:
            spike_time = []
        print(pd.Series(
        np.ones_like(spike_time, dtype=int), index=spike_time,
        name='{0}_{1:02d}_{2:02}_{3:03}_{4:03}'.format(*neuron_key)))
    return pd.Series(
        np.ones_like(spike_time, dtype=int), index=spike_time,
        name='{0}_{1:02d}_{2:02}_{3:03}_{4:03}'.format(*neuron_key))

def get_data_filename(animal, day, file_type):
    '''Returns the Matlab file name assuming it is in the Raw Data
    directory.
    Parameters
    ----------
    animal : namedtuple
        First element is the directory where the animal's data is located.
        The second element is the animal shortened name.
    day : int
        Day of recording
    file_type : str
        Data structure name (e.g. linpos, dio)
    Returns
    -------
    filename : str
        Path to data file
    '''
    filename = '{animal.short_name}{file_type}{day:02d}.mat'.format(
        animal=animal,
        file_type=file_type,
        day=day)
    #modify animal_path below
    return join('C:\\Frank', filename)

def convert_neuron_epoch_to_dataframe_modified(tetrodes_in_epoch, animal, day, epoch):
    '''
    Given an neuron data structure, return a cleaned up DataFrame
    '''
    DROP_COLUMNS = ['ripmodtag', 'thetamodtag', 'runripmodtag',
                    'postsleepripmodtag', 'presleepripmodtag',
                    'runthetamodtag', 'ripmodtag2', 'runripmodtag2',
                    'postsleepripmodtag2', 'presleepripmodtag2',
                    'ripmodtype', 'runripmodtype', 'postsleepripmodtype',
                    'presleepripmodtype', 'FStag', 'ripmodtag3',
                    'runripmodtag3', 'ripmodtype3', 'runripmodtype3',
                    'tag', 'typetag', 'runripmodtype2',
                    'tag2', 'ripmodtype2', 'descrip']

    NEURON_INDEX = ['animal', 'day', 'epoch',
                    'tetrode_number', 'neuron_number']

    neuron_dict_list = [_add_to_dict(
        _convert_to_dict(neuron), tetrode_ind, neuron_ind)
        for tetrode_ind, tetrode in enumerate(
        tetrodes_in_epoch[0][0])
        for neuron_ind, neuron in enumerate(tetrode[0])
        #if neuron.size > 0
    ]
    try:
        return (pd.DataFrame(neuron_dict_list)
                  .drop(DROP_COLUMNS, axis=1, errors='ignore')
                  .assign(animal=animal)
                
                  .assign(day=day)
                  .assign(epoch=epoch)
                  .assign(neuron_id=_get_neuron_id
                         )
                # set index to identify rows
                  .set_index(NEURON_INDEX)
                  .sort_index())
    except AttributeError:
        logger.debug(f'Neuron info {animal}, {day}, {epoch} not processed')

def _convert_to_dict_modified(struct_array):
    try:
        return {name: struct_array[name].item().item()
                for name in struct_array.dtype.names
                if struct_array[name].item().size == 1}
    except TypeError:
        return {}
    #added in the AttributeError
    except AttributeError:
        return {}
    
def get_spike_indicator_dataframe_modified(neuron_key, animals, time_function=get_trial_time):
    time = time_function(neuron_key[:3], animals)
    spikes_df = get_spikes_dataframe_mod(neuron_key, animals)
    time_index = None
    try:
        time_index = np.digitize(spikes_df.index.total_seconds(),
                             time.total_seconds())
 
        time_index[time_index >= len(time)] = len(time) -1
    #the exception is for empty data
    except AttributeError: 
        print('No spikes here; data is emtpy')
    
    #the following accounts for an empty time_index
    if time_index is not None:
        return (spikes_df.groupby(time[time_index]).sum().reindex(index=time, fill_value=0))
    else:
        return None
        
def make_neuron_dataframe_modified(animals):
    '''Information about all recorded neurons such as brain area.
    The index of the dataframe corresponds to the unique key for that neuron
    and can be used to load spiking information.
    Parameters
    ----------
    animals : dict of named-tuples
        Dictionary containing information about the directory for each
        animal. The key is the animal_short_name.
    Returns
    -------
    neuron_information : pandas.DataFrame
    '''
    #neuron_file_names = [(get_neuron_info_path(animals[animal]), animal)
     #                    for animal in animals]
    neuron_file_names = ['C:\\Frank\\fracellinfo.mat']#modify
    neuron_data = [(loadmat(file_name), animal_name) for animal_name, file_name in zip(animals.keys(), neuron_file_names)]

    return pd.concat([
        convert_neuron_epoch_to_dataframe(
            epoch, animal, day_ind + 1, epoch_ind + 1)
        for cellfile, animal in neuron_data
        for day_ind, day in enumerate(cellfile['cellinfo'].T)
        for epoch_ind, epoch in enumerate(day[0].T)
    ]).sort_index()

#modified for access with correct name
def get_tetrode_info_path_modified(animal):
    '''Returns the Matlab tetrode info file name assuming it is in the
    Raw Data directory.
    Parameters
    ----------
    animal : namedtuple
        First element is the directory where the animal's data is located.
        The second element is the animal shortened name.
    Returns
    -------
    filename : str
        The path to the information about the tetrodes for a given animal.
    '''
    filename = 'tetinfo'
    return filename


def convert_tetrode_epoch_to_dataframe(tetrodes_in_epoch, epoch_key):
    '''Convert tetrode information data structure to dataframe.
    Parameters
    ----------
    tetrodes_in_epoch : matlab data structure
    epoch_key : tuple
        Unique key identifying a recording epoch. Elements are
        (animal, day, epoch)
    Returns
    -------
    tetrode_info : dataframe
    '''
    animal, day, epoch = epoch_key
    print(tetrodes_in_epoch)
    tetrode_dict_list = [_convert_to_dict(
        tetrode) for tetrode in tetrodes_in_epoch]
    return (pd.DataFrame(tetrode_dict_list)
              .assign(animal=lambda x: animal)
              .assign(day=lambda x: day)
              .assign(epoch=lambda x: epoch)
              .assign(tetrode_number=lambda x: x.index + 1)
              .assign(tetrode_id=_get_tetrode_id)
            # set index to identify rows
              .set_index(['animal', 'day', 'epoch', 'tetrode_number'])
              .sort_index()
            )

In [9]:
#new functions 


#sums 2DArray of trials over time, over all trials, such that one cummulative time-series results
def sum_time_series_dict(time_series_dict):
    
    num_bins = max([len(x) for x in time_series_dict.values() if x is not None])
    res = np.zeros(num_bins)
    for arr in time_series_dict.values():
        if arr is not None:
            res[:len(arr)] += arr
    return res

#generates dictionary of time-series, relative to neuron_key_list (indeces) given as input
def generate_spike_indicator_dict(neuron_key_list, animals):
    spike_indicator_dict = {}
    for neuron_key_str in neuron_key_list:
        animal_short_name, day_number, epoch_number, tetrode_number, neuron_number = neuron_key_str.split("_")
        neuron_key = (animal_short_name, int(day_number), int(epoch_number), int(tetrode_number), int(neuron_number))
        try:
            spike_indicator_array = get_spike_indicator_dataframe_modified(neuron_key, animals).values
        except AttributeError:
            spike_indicator_array = None
            print(f"No spike indicator data for neuron: {neuron_key}")
        spike_indicator_dict[neuron_key] = spike_indicator_array
    return spike_indicator_dict


def split_neuron_dataframe_informationally(df, split_cols):
    '''Splits a DataFrame into multiple DataFrames based on specified
    column values.
    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame to be split.
    split_cols : list of str
        The names of the columns to use for splitting the DataFrame.
    Returns
    -------
    dfs : dict
        A dictionary containing the split DataFrames. The keys are tuples
        containing the unique combinations of the specified column values.
    '''
    dfs = {}
    for idx, group in df.groupby(split_cols):
        dfs[idx] = group.copy()
    return dfs


def dict_to_numpy(my_dict):
    my_array = np.array([v for v in my_dict.values()])
    return my_array

def dict_to_list(my_dict):
    my_list = [v for v in my_dict.values() if v is not None]
    return my_list

def pad_arrays(arrays):
    """
    Pads a list of arrays with zeros so that they are all the same length.
    
    Parameters:
    -----------
    arrays : list of numpy arrays
        The arrays to pad.
    
    Returns:
    --------
    padded_arrays : numpy array
        The padded arrays, stacked along the first axis.
    """
    # Get the maximum length of all the arrays
    max_len = max(len(arr) for arr in arrays if arr is not None)
    
    # Initialize an array of zeros to hold the padded arrays
    padded_arrays = np.zeros((len(arrays), max_len))
    
    # Pad each array with zeros to make it the same length as the longest array
    for i, arr in enumerate(arrays):
        if arr is not None:
            padded_arrays[i, :len(arr)] = arr
    
    return padded_arrays

def _convert_to_dict(struct_array):
    try:
        return {name: struct_array[name].item().item()
                for name in struct_array.dtype.names
                if struct_array[name].item().size == 1}
    except TypeError:
        return {}
    #added in the AttributeError
    except AttributeError:
        return {}
    
def chunk_array(arr, n):
    # Determine the size of each subarray
    size = (len(arr) + n - 1) // n

    # Create the 2D list
    result = [[] for _ in range(n)]

    # Iterate over the original array and insert each element into the appropriate subarray
    for i, elem in enumerate(arr):
        subarray_index = i // size
        result[subarray_index].append(elem)

    return result

In [10]:
df = make_neuron_dataframe_modified(animals)
splitted_df = split_neuron_dataframe_informationally(df, ['area', 'animal'])

neuron_ids = df['neuron_id'].tolist()

CA1 = splitted_df['CA1', 'fra']
CA3 = splitted_df['CA3', 'fra']
DG = splitted_df['DG', 'fra']

list_id = CA1['neuron_id'].tolist()
spike_dict_what = generate_spike_indicator_dict(list_id, animals)

#printed_keys below are from informationally split df function above.keys


dict_keys([('CA1', 'fra'), ('CA2', 'fra'), ('CA3', 'fra'), ('DG', 'fra')])

In [11]:
summed_time_series = sum_time_series_dict(spike_dict_what)

NameError: name 'spike_dict_what' is not defined

In [39]:

chunks = chunk_array(summed_time_series, 100)
chunks = chunks[:99]
print(len(chunks))

99


In [41]:
#del spike_dict_CA3
spike_dict_CA3 = mre.input_handler(chunks)
#spike_dict_CA3 = mre.input_handler(padded_array)
#print([np.isfinite(chunk) for chunk in spike_dict_CA3 if False])

auto = mre.full_analysis(
    
    data= spike_dict_CA3,
    method = 'ts',
    numboot = 5,
    targetdir='./output',
    title='Full Analysis',
    dt=4, dtunit='ms',
    tmin=0, tmax=8000,
    fitfuncs=['complex'])

INFO     input_handler() detected list, parsing to numpy ndarray as float
INFO     input_handler() returning ndarray with 99 trial(s) and 22331 datapoints
INFO     input_handler() detected ndarray of numbers
INFO     input_handler() returning ndarray with 99 trial(s) and 22331 datapoints
INFO     coefficients() with 'trialseparated' method for 99 trials of length 22331.
INFO     Bootstrapping 5 replicas
INFO     5 bootstrap replicas done
INFO     Bounded fit to $|A| e^{-k/\tau} + B e^{-(k/\tau_{osc})^\gamma} \cos(2 \pi \nu k) + C e^{-(k/\tau_{gs})^2} + O$
INFO     Fitting with 22 different start values
INFO     Bootstrapping 5 replicas (22 fits each)
INFO     Finished fitting the data to f_complex,
         mre = 0.99590(31), tau = 973.73(90.85)ms, ssres = 0.00140
INFO     Saving plot to C:\Users\janbe\output/Full Analysis.pdf
INFO     Saving meta to C:\Users\janbe\output/Full Analysis.tsv
INFO     full_analysis() done
