In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import time
import datetime as dt
import os
import seaborn as sns
import scipy.stats as sts

import random
import string

In [None]:
import spikeinterface.full as si  # import core only
import spikeinterface.extractors as se
import spikeinterface.preprocessing as spre
import spikeinterface.sorters as ss
import spikeinterface.postprocessing as spost
import spikeinterface.qualitymetrics as sqm
import spikeinterface.comparison as sc
import spikeinterface.exporters as sexp
import spikeinterface.curation as scur
import spikeinterface.widgets as sw
from spikeinterface.sortingcomponents.peak_detection import detect_peaks
from spikeinterface.sortingcomponents.peak_localization import localize_peaks

from typing import Tuple, List
from probeinterface import Probe

def load_recording_from_raw(root: str, sample_base: str, well: Tuple[int, int], time_samplings_to_mask: List[Tuple[float, float]]):

    traces_list = []
    channel_ids = []

    df = pd.read_csv(f'{root}/{sample_base}/{sample_base}.info', index_col=0, names=['index', 'value'], sep='\t')
    sampling_frequency = df.loc['SamplingFrequency', 'value']
    voltage_scale = np.abs(df.loc['VoltageScale', 'value'])

    # We choose 10 here because in 64-electrode MEAs the range would be up to 9. 
    # Since the time required for the non-existing electrodes is small, we don't mine using a larger number.
    for Erow in range(1,10):  
        for Ecol in range(1,10):
            filename = f'{root}/{sample_base}/{well[0]}-{well[1]}-{Erow}-{Ecol}_voltageRaw'
            is_txt, is_gzip = os.path.exists(f'{filename}.txt'), os.path.exists(f'{filename}.txt.gz') 

            if is_txt or is_gzip:
                channel_ids.append(f'{Erow}-{Ecol}')
                
                if is_txt:
                    list_voltages = np.loadtxt(f'{filename}.txt')
                elif is_gzip:
                    list_voltages = np.loadtxt(f'{filename}.txt.gz')

                traces_list.append(list_voltages)
            

    trace_array = np.asarray(traces_list).transpose() / voltage_scale

    for time_sampling in time_samplings_to_mask:
        t0 = int(time_sampling[0] * sampling_frequency)
        tf = int(time_sampling[1] * sampling_frequency)
        trace_array[t0:tf, :] = 0

    sample_recording = si.NumpyRecording(
        traces_list=[trace_array],
        sampling_frequency=sampling_frequency,
        channel_ids=np.asarray(channel_ids)
    )

    sample_recording.set_property('group', [0] * len(channel_ids))
    sample_recording.is_dumpable = True  # This is necessary for some options later, like spike sorting

    return sample_recording


def load_probe_recording(recording: si.NumpyRecording, type_MEAS: int, ):
    dist_multiplier = 350 if type_MEAS == 16 else 300
    circle_radius = 50

    channel_ids = recording.get_channel_ids()

    positions = np.zeros((len(channel_ids), 2), dtype=float)
    contact_vector = []
    for channel_idx, channel in enumerate(channel_ids):
        x_coord, y_coord = (int(channel.split('-')[0]) - 1) * dist_multiplier, (int(channel.split('-')[1]) - 1) * dist_multiplier
        positions[channel_idx, 1], positions[channel_idx, 0] = x_coord, y_coord
        
        contact_vector.append((0, x_coord,   y_coord, 'circle', circle_radius, '', '', channel_idx, 'um', 1., 0., 0., 1.))

    # later if we are using peak detection, we may need it
    recording.set_channel_locations(locations=positions)

    probe = Probe(ndim=2, si_units='um')
    probe.set_contacts(positions=positions, shapes='circle', shape_params={'radius': circle_radius})
    probe.device_channel_indices = np.arange(len(channel_ids))
    probe.create_auto_shape('rect')

    recording.set_probe(probe)


    # Create contact_vector
    dtypes=[('probe_index', '<i8'), ('x', '<f8'), ('y', '<f8'), ('contact_shapes', '<U64'), 
            ('radius', '<f8'), ('shank_ids', '<U64'), ('contact_ids', '<U64'), ('device_channel_indices', '<i8'), 
            ('si_units', '<U64'), ('plane_axis_x_0', '<f8'), ('plane_axis_x_1', '<f8'), ('plane_axis_y_0', '<f8'), 
            ('plane_axis_y_1', '<f8')]

    recording.set_property('contact_vector', np.asarray(contact_vector, dtype=dtypes))

In [None]:
def retrieve_peaks(root, sample_base, well):
    session_token = dt.datetime.now().strftime("%y-%m-%d") + '_' + \
                ''.join(random.choice(string.ascii_letters) for i in range(8)) + str(well[0]) + '-' + str(well[1])
    
    recording = load_recording_from_raw(root=root, sample_base=sample_base, well=well, time_samplings_to_mask=[])
    load_probe_recording(recording=recording, type_MEAS=16)
    
    recording_bin = recording.save(n_jobs=16, chunk_duration="1s", folder=f'tmp/bin_{session_token}')

    recording_f = spre.bandpass_filter(recording_bin, freq_min=300, freq_max=5000)

    recording_cmr = spre.common_reference(recording_f, reference='global', operator='median')

    noise_levels = si.get_noise_levels(recording_cmr, return_scaled=False)

    peaks = detect_peaks(recording_cmr,
                        method='locally_exclusive',
                        local_radius_um=450, 
                        detect_threshold=5,
                        noise_levels=noise_levels,
                        )
    
    list_peaks = []
    list_electrodes = []

    for i in range(16):
        list_peaks_i = [peak[0] / recording.sampling_frequency for peak in peaks if peak[1] == i]
        list_peaks += list_peaks_i

        el_x, el_y = i // 4, i % 4
        list_electrodes += [f'{el_x + 1}{el_y + 1}'] * len(list_peaks_i)

    return list_peaks, list_electrodes

**RUN ONLY ONE CELL (100, 200 or 500)**

In [None]:
DATE = '2024_05_07'
MV = 100

folder_base = f'/data/Proyectos/Nanoneuro_exps_ane/raw_files/{DATE}/{MV}/'
folder_df_save = f'/data/Proyectos/Nanoneuro_exps_ane/results/{DATE}/{MV}/'

os.makedirs(folder_df_save, exist_ok=True)

list_conditions = [#('Condition', 'Treatment', 'Wells', 'Well_num', 'Replicate', 'Folder') 
                    ('BP',          'PRE',     ['C5', 'C6'],               [35, 36],           [1, 2],         ''), 
                    ('BP',          'POST',    ['C5', 'C6'],               [35, 36],           [1, 2],         ''), 
                    ('LINK1',       'PRE',     ['D2'],                     [42],               [1],            ''), 
                    ('LINK1',       'POST',    ['D2'],                     [42],               [1],            ''), 
                    ('LINK2',       'PRE',     ['D3', 'D4'],               [43, 44],           [1, 2],         ''), 
                    ('LINK2',       'POST',    ['D3', 'D4'],               [43, 44],           [1, 2],         ''),
                    ('LINK3',       'PRE',     ['D5', 'D6'],               [45, 46],           [1, 2],         ''),
                    ('LINK3',       'POST',    ['D5', 'D6'],               [45, 46],           [1, 2],         ''),
                    ('BP+LINK1',    'PRE',     ['A1', 'A2', 'A3', 'A4'],   [11, 12, 13, 14],   [1, 2, 3, 4],   ''), 
                    ('BP+LINK1',    'POST',    ['A1', 'A2', 'A3', 'A4'],   [11, 12, 13, 14],   [1, 2, 3, 4],   ''), 
                    ('BP+LINK2',    'PRE',     ['B2', 'B3', 'B4'],         [22, 23, 24],       [1, 2, 3],      ''), 
                    ('BP+LINK2',    'POST',    ['B2', 'B3', 'B4'],         [22, 23, 24],       [1, 2, 3],      ''), 
                    ('BP+LINK3',    'PRE',     ['C2', 'C3', 'C4'],         [32, 33, 34],       [1, 2, 3],      ''), 
                    ('BP+LINK3',    'POST',    ['C2', 'C3', 'C4'],         [32, 33, 34],       [1, 2, 3],      ''), 
                   ]

In [None]:
DATE = '2024_05_07'
MV = 200

folder_base = f'/data/Proyectos/Nanoneuro_exps_ane/raw_files/{DATE}/{MV}/'
folder_df_save = f'/data/Proyectos/Nanoneuro_exps_ane/results/{DATE}/{MV}/'

os.makedirs(folder_df_save, exist_ok=True)

list_conditions = [#('Condition', 'Treatment', 'Wells', 'Well_num', 'Replicate', 'Folder') 
                    ('CTRL',        'PRE',     ['A1', 'A2', 'A3'],     [11, 12, 13], [1, 2, 3], ''), 
                    ('CTRL',        'POST',    ['A1', 'A2', 'A3'],     [11, 12, 13], [1, 2, 3], ''), 
                    ('BP',          'PRE',     ['A4', 'A5', 'A6'],     [14, 15, 16], [1, 2, 3], ''), 
                    ('BP',          'POST',    ['A4', 'A5', 'A6'],     [14, 15, 16], [1, 2, 3], ''),
                    ('LINK1',       'PRE',     ['B4', 'B5', 'B6'],     [25, 25, 26], [1, 2, 3], ''), 
                    ('LINK1',       'POST',    ['B4', 'B5', 'B6'],     [25, 25, 26], [1, 2, 3], ''), 
                    ('LINK2',       'PRE',     ['C3',' C4', 'C6'],     [33, 34, 36], [1, 2, 3], ''), 
                    ('LINK2',       'POST',    ['C3',' C4', 'C6'],     [33, 34, 36], [1, 2, 3], ''), 
                    ('LINK3',       'PRE',     ['D4', 'D5', 'D6'],     [44, 45, 46], [1, 2, 3], ''), 
                    ('LINK3',       'POST',    ['D4', 'D5', 'D6'],     [44, 45, 46], [1, 2, 3], ''), 
                    ('BP+LINK1',    'PRE',     ['B1', 'B2', 'B3'],     [21, 22, 23], [1, 2, 3], ''), 
                    ('BP+LINK1',    'POST',    ['B1', 'B2', 'B3'],     [21, 22, 23], [1, 2, 3], ''), 
                    ('BP+LINK2',    'PRE',     ['C1', 'C2', 'C5'],     [21, 32, 35], [1, 2, 3], ''), 
                    ('BP+LINK2',    'POST',    ['C1', 'C2', 'C5'],     [21, 32, 35], [1, 2, 3], ''), 
                    ('BP+LINK3',    'PRE',     ['D1', 'D2', 'D3'],     [41, 42, 43], [1, 2, 3], ''), 
                    ('BP+LINK3',    'POST',    ['D1', 'D2', 'D3'],     [41, 42, 43], [1, 2, 3], ''), 
                   ]

In [None]:
DATE = '2024_05_07'
MV = 500

folder_base = f'/data/Proyectos/Nanoneuro_exps_ane/raw_files/{DATE}/{MV}/'
folder_df_save = f'/data/Proyectos/Nanoneuro_exps_ane/results/{DATE}/{MV}/'

os.makedirs(folder_df_save, exist_ok=True)

list_conditions = [#('Condition', 'Treatment', 'Wells', 'Well_num', 'Replicate', 'Folder') 
                    ('CTRL',        'PRE',      ['A1', 'A2', 'A3'], ['11', '12', '13'], [1, 2, 3], 'PRE'), 
                    ('CTRL',        'POST',     ['A1', 'A2', 'A3'], ['11', '12', '13'], [1, 2, 3], 'POST'),
                    ('BP',          'PRE',      ['A4', 'A5', 'A6'], ['14', '15', '16'], [1, 2, 3], 'PRE'), 
                    ('BP',          'POST',     ['A4', 'A5', 'A6'], ['14', '15', '16'], [1, 2, 3], 'POST'), 
                    ('LINK1',       'PRE',      ['B1', 'B2', 'B3'], ['21', '22', '23'], [1, 2, 3], 'PRE'), 
                    ('LINK1',       'POST',     ['B1', 'B2', 'B3'], ['21', '22', '23'], [1, 2, 3], 'POST'),
                    ('LINK2',       'PRE',      ['C1', 'C2', 'C3'], ['31', '32', '33'], [1, 2, 3], 'PRE'), 
                    ('LINK2',       'POST',     ['C1', 'C2', 'C3'], ['31', '32', '33'], [1, 2, 3], 'POST'),
                    ('LINK3',       'PRE',      ['D1', 'D2', 'D3'], ['41', '42', '43'], [1, 2, 3], 'PRE'), 
                    ('LINK3',       'POST',     ['D1', 'D2', 'D3'], ['41', '42', '43'], [1, 2, 3], 'POST'),
                    ('BP+LINK1',    'PRE',      ['B4', 'B5', 'B6'], ['24', '25', '26'], [1, 2, 3], 'PRE'), 
                    ('BP+LINK1',    'POST',     ['B4', 'B5', 'B6'], ['24', '25', '26'], [1, 2, 3], 'POST'),
                    ('BP+LINK2',    'PRE',      ['C4', 'C5', 'C6'], ['34', '35', '36'], [1, 2, 3], 'PRE'), 
                    ('BP+LINK2',    'POST',     ['C4', 'C5', 'C6'], ['34', '35', '36'], [1, 2, 3], 'POST'),
                    ('BP+LINK3',    'PRE',      ['D4', 'D5', 'D6'], ['44', '45', '46'], [1, 2, 3], 'PRE'), 
                    ('BP+LINK3',    'POST',     ['D4', 'D5', 'D6'], ['44', '45', '46'], [1, 2, 3], 'POST'),
                   ]

**||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||**

In [None]:
df_peaks = pd.DataFrame(columns=['condition', 'treatment', 'well', 'well_num', 'replicate', 'electrode', 'time'])

for condition, treatment, list_wells, list_wells_num, replicates, sample_base in list_conditions:
    for well, well_num, replicate in zip(list_wells, list_wells_num, replicates):
        print(condition, treatment, well, well_num, replicate, sample_base)
        list_peak_times, list_electrodes = retrieve_peaks(root=folder_base, sample_base=sample_base, well=(int(well_num[0]), int(well_num[1])))
        
        df_peaks_i = pd.DataFrame({'condition': [condition] * len(list_peak_times), 
                                    'treatment': [treatment] * len(list_peak_times), 
                                    'well': [well] * len(list_peak_times), 
                                    'well_num': [well_num] * len(list_peak_times), 
                                    'replicate': [replicate] * len(list_peak_times), 
                                    'electrode': list_electrodes, 
                                    'time': list_peak_times
                                    })
        
        df_peaks = pd.concat([df_peaks, df_peaks_i]).reset_index(drop=True)
        print(len(df_peaks))

In [None]:
df_peaks.to_csv(f'{folder_df_save}/df_peaks_full_{MV}.csv', index=False)

## Load df peaks and remove non-compliant electrodes

In [None]:
df_peaks = pd.read_csv(f'{folder_df_save}/df_peaks_full_{MV}.csv')
df_peaks

In [None]:
df_peaks['replicate'] = [(int(i[1]) - 1) % 3 for i in df_peaks['well'].values]

In [None]:
df_peaks

In [None]:
# Plot each well
df_peaks_sub = df_peaks[(df_peaks['treatment'] == 'POST') & 
                        (df_peaks['well'] == 'B5')]

fig = plt.figure(figsize=(30, 6))

y = [(int(i[0]) - 1) * 4 + (int(i[1]) - 1) for i in df_peaks_sub['electrode'].astype(str).values]
x =  df_peaks_sub['time'].values 

plt.yticks(np.arange(16), [f'{i//4 + 1}{i%4 + 1}' for i in np.arange(16)])

plt.scatter(x, y, marker='|', alpha=0.15)

# plt.xlim([10, 70])

In [None]:
# MAKE A LIST OF ELECTRODES TO REFUSE
# Since each condition has a different set of wells, we only need to encode the well info

# We are going to refuse electrodes with less than 60 spikes per minute.

min_spikes = 60

dict_electrode_refuse = {}


for well in sorted(set(df_peaks['well'].values)):
    list_electrodes_refuse = []

    df_well = df_peaks[df_peaks['well'] == well]

    for treatment in sorted(set(df_well['treatment'].values)):
        df_well_treatment = df_well[df_well['treatment'] == treatment]

        list_counts = df_well_treatment.groupby('electrode').count()['time'].sort_values() * 60 / (df_peaks_sub['time'].max() - df_peaks_sub['time'].min())
        list_electrodes = list_counts[list_counts < 60].index.tolist()

        list_electrodes_refuse += list_electrodes

    set_electrodes_refuse = sorted(list(set(list_electrodes_refuse)))
    dict_electrode_refuse[well] = set_electrodes_refuse
        

print(dict_electrode_refuse)


list_remove_idx = []

for well, list_electrodes in dict_electrode_refuse.items():
    for electrode in list_electrodes:
        df_sub = df_peaks[(df_peaks['well'] == well) & (df_peaks['electrode'] == int(electrode))]
        list_remove_idx += df_sub.index.tolist()

list_remove_idx_bool = np.ones(len(df_peaks)).astype(bool)
list_remove_idx_bool[list_remove_idx] = False

df_peaks_sub_filter = df_peaks.loc[list_remove_idx_bool]

In [None]:
df_peaks

In [None]:
df_peaks_sub_filter

# Metrics

## Relative change in MFR

In [None]:
df_MFR = df_peaks_sub_filter.groupby(['condition', 'treatment', 'well', 'replicate', 'electrode']).count()['time'] / (df_peaks_sub_filter['time'].max() - df_peaks_sub_filter['time'].min())
df_MFR = df_MFR.reset_index().sort_values(by=['well', 'electrode'])
df_MFR = df_MFR.rename(columns={'time': 'MFR'})
df_MFR

In [None]:
df_MFR[df_MFR['treatment'] == 'POST']

In [None]:
df_MFR_ratio = df_MFR[df_MFR['treatment'] == 'POST']

ratios = df_MFR[df_MFR['treatment'] == 'POST']['MFR'].values /  df_MFR[df_MFR['treatment'] == 'PRE']['MFR'].values

df_MFR_ratio.loc[:, 'PRE'] = df_MFR[df_MFR['treatment'] == 'PRE'].loc[:, 'MFR'].values
df_MFR_ratio.loc[:, 'POST'] = df_MFR[df_MFR['treatment'] == 'POST'].loc[:, 'MFR'].values
df_MFR_ratio['mean_PRE_POST'] = (df_MFR_ratio['PRE'] + df_MFR_ratio['POST']) / 2

df_MFR_ratio = df_MFR_ratio.rename(columns={'MFR': 'MFR_ratio'})[['condition', 'well', 'replicate', 'electrode', 'PRE', 'POST', 'mean_PRE_POST', 'MFR_ratio']]
df_MFR_ratio['MFR_ratio'] = ratios * 100

df_MFR_ratio

In [None]:
fig = plt.figure(figsize=(19,6))
sns.swarmplot(data = df_MFR_ratio, x='condition', y='MFR_ratio', hue='replicate')
plt.plot([-0.5, 7.5], [100, 100], c='#bc0000')

In [None]:
fig = plt.figure(figsize=(19,6))
sns.boxplot(data = df_MFR_ratio, x='condition', y='MFR_ratio')
plt.plot([-0.5, 7.5], [100, 100], c='#bc0000')

In [None]:
sns.scatterplot(data = df_MFR_ratio, x='PRE', y='MFR_ratio', label='PRE')
sns.scatterplot(data = df_MFR_ratio, x='POST', y='MFR_ratio', label='POST')

In [None]:
#create regplot
p = sns.regplot(data = df_MFR_ratio, x='mean_PRE_POST', y='MFR_ratio')

#calculate slope and intercept of regression equation
slope, intercept, r, p, sterr = sts.linregress(x=p.get_lines()[0].get_xdata(),
                                                       y=p.get_lines()[0].get_ydata())

#display slope and intercept of regression equation
print(intercept, slope, p)


## Analysis of ISIs

In [None]:
df_ISIs = pd.DataFrame(columns=df_peaks_sub_filter.columns)
list_wells = df_peaks_sub_filter['well'].drop_duplicates().values 

for well in list_wells:
    df_well = df_peaks_sub_filter[df_peaks_sub_filter['well'] == well]
    list_electrodes = df_well['electrode'].drop_duplicates().values 

    for electrode in list_electrodes:
        df_well_electrode = df_well[df_well['electrode'] == electrode].copy()
        diff_times = df_well_electrode['time'].iloc[1:].values - df_well_electrode['time'].iloc[:-1].values

        df_well_electrode.loc[df_well_electrode.index[:-1], 'time'] = diff_times

        df_ISIs = pd.concat([df_ISIs, df_well_electrode.iloc[:-1]])

df_ISIs['cond-treat'] = df_ISIs['condition'] + '-' + df_ISIs['treatment']

In [None]:
def kde_plot(df_ISIs, x_col, g_col, log=True):
    sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

    # Create the data
    df = pd.DataFrame(dict(x=df_ISIs[x_col].values, g=df_ISIs[g_col].values))
    df = df[df['x'] > 0]

    # Initialize the FacetGrid object
    pal = sns.cubehelix_palette(len(df_ISIs[g_col].drop_duplicates().values), rot=-.25, light=.7)
    g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)

    # Draw the densities in a few steps
    g.map(sns.kdeplot, "x", log_scale=log, 
        bw_adjust=.5, clip_on=False,
        fill=True, alpha=1, linewidth=1.5)
    g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5, log_scale=log,)

    # passing color=None to refline() uses the hue mapping
    g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)


    # Define and use a simple function to label the plot in axes coordinates
    def label(x, color, label):
        ax = plt.gca()
        ax.text(0, .2, label, fontweight="bold", color=color,
                ha="left", va="center", transform=ax.transAxes)


    g.map(label, "x")

    # Set the subplots to overlap
    g.figure.subplots_adjust(hspace=-.25)

    # Remove axes details that don't play well with overlap
    g.set_titles("")
    g.set(yticks=[], ylabel="")
    g.despine(bottom=True, left=True)

In [None]:
kde_plot(df_ISIs, x_col='time', g_col='well', log=True)

In [None]:
kde_plot(df_ISIs, x_col='time', g_col='cond-treat', log=True)

In [None]:
kde_plot(df_ISIs[(df_ISIs['time'] > 0.3) & (df_ISIs['time'] < 5)], x_col='time', g_col='well', log=False)

In [None]:
kde_plot(df_ISIs[(df_ISIs['time'] > 0.5) & (df_ISIs['time'] < 5)], 
         x_col='time', g_col='cond-treat', log=False)

## Burst frequency

In [None]:
tw = 0.1
cutoff_counts = 60

time_ranges = np.arange(np.min(df_laser['start_times']) , np.max(df_laser['end_times']) , tw)
time_ranges_mean = (time_ranges[1:] + time_ranges[:-1]) * 0.5 

counts = np.array([len(df_spikes[(df_spikes['Time (s)'] >= start) & (df_spikes['Time (s)'] < end)]) for start, end in zip(time_ranges[:-1], time_ranges[1: ])])
len(counts)