In [1]:
# from   pathlib      import Path
# import importlib

# import numpy                as np
# import matplotlib           as mpl
# import matplotlib.pyplot    as plt
# import seaborn              as sns
# import pandas               as pd

# from scipy.stats   import kruskal, wilcoxon, mannwhitneyu, ranksums
# from scipy.optimize import curve_fit
# import statsmodels.api as sm
# from statsmodels.multivariate.manova import MANOVA
# import statsmodels.formula.api as smf
# from eidynamics     import utils, plot_tools
# plt.rcParams['font.family'] = 'Arial'
# plt.rcParams['font.size'] = 12
# plt.rcParams['svg.fonttype'] = 'none'

# # make a colour map viridis
# viridis = mpl.colormaps["viridis"]
# flare   = mpl.colormaps["flare"]
# crest   = mpl.colormaps["crest"]
# magma   = mpl.colormaps["magma"]
# edge    = mpl.colormaps['edge']

# color_E = "flare"
# color_I = "crest"
# color_freq = {1:magma(0.05), 5:magma(0.1), 10:magma(0.2), 20:magma(.4), 30:magma(.5), 40:magma(.6), 50:magma(.7), 100:magma(.9)}
# color_squares = color_squares = {1:viridis(0.2), 5:viridis(.4), 7:viridis(.6), 15:viridis(.8), 20:viridis(1.0)}
# color_EI = {-70:flare(0), 0:crest(0)}
# colors_EI = {-70:flare, 0:crest}

# Fs = 2e4
# %matplotlib widget
# freq_sweep_pulses = np.arange(9)

# from eidynamics.fit_PSC     import find_sweep_expected
# # from Findsim        import tab_presyn_patterns_LR_43
# # import parse_data
# from eidynamics     import utils, plot_tools
# import all_cells
# # import plotFig2
# import stat_annotate

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import plotFig2
%colors nocolor
Fs=2e4

In [4]:
# load data
figure_raw_material_location = Path(r"paper_figure_matter\\")
data_path                    = Path(r"parsed_data\\FreqSweep\\")

# Load the dataset
freq_sweep_vc_datapath =  data_path / "all_cells_FreqSweep_VC_long.h5"
df = pd.read_hdf(freq_sweep_vc_datapath, key='data')

# add trialID
df['trialID'] = df['cellID'].astype(str) +'_'+ df['exptID'].astype(str) +'_'+ df['sweep'].astype(str)

# # expanded dataframe (processed dataframe with metadata and analysed params)
# expanded_data_path = Path(r"parsed_data\all_cells_FreqSweep_combined_expanded.h5")
# xc_FS_analyseddf = pd.read_hdf(expanded_data_path, key='data')

In [5]:
df.shape

(0, 80050)

In [3]:
# remove columns that are not needed
# not needed columns are 20049 to 80049
df = df.drop(df.columns[20049:80049], axis=1)
df.shape

(4407, 20073)

In [4]:
import importlib
importlib.reload(plotFig2)

new_fields = ['stimtime_','valley_', 'peak_', 'slope_', 'peakdelay_', 'onsetdelay_','normpeak_', 'PSC_', 'normPSC_', 'spike_']
newcolumns = [field + str(i) for field in new_fields for i in range(9)]
Fs=2e4
# Adding new columns with NaN values
df = df.assign(**{col: np.nan for col in newcolumns})
c = 0
d=0
r=0
k=0
for idx, row in df.iterrows():
    print(f"Processing {idx} row with trialID {row['trialID']}")
    # do not process if freq==10
    if row['stimFreq'] <20:
        print(f"Skipping {idx} row with trialID {row['trialID']}: Low frequency")
        d+=0
        continue
    
    try:
        if row['probePulseStart'] == row['pulseTrainStart']:
            freq = row['stimFreq']
            isi = int(0.9*Fs / freq)
            print(f"interpolating {idx} row with trialID {row['trialID']}: No probe pulse. {row['probePulseStart']} =/= { row['pulseTrainStart']}")
            row[2000:2000+isi] = row[4620:4620+isi]
            row[2000+isi-100:4620] = np.interp(np.arange(2000+isi-100,4620), [2000+isi-100,4620], [row[2000+isi-100], row[4620]])
            c+=1
            results = plotFig2.deconv(row[49:80049], row['stimFreq'], 0.1, row['pulseTrainStart'], None, noprobepulse=False)
        else:
            results = plotFig2.deconv(row[49:80049], row['stimFreq'], row['probePulseStart'], row['pulseTrainStart'], None, noprobepulse=False)
    except RuntimeError:
        r+=1
        continue
    except FloatingPointError as e:
        print(f"Error at {idx} row with trialID {row['trialID']}: {e}")
        print(row['stimFreq'], row['numSq'], row['probePulseStart'], row['pulseTrainStart'])
        k+=1
        continue
    
    valleyTimes, valleys, peakTimes, peaks = results[2]
    # elementwise difference between peak and valley is PSC
    pscs = np.array(peaks) - np.array(valleys)
    normPSCs = np.array(pscs) / pscs[0]
    normpeaks = np.array(peaks) / peaks[0]
    stimTimes = np.array(results[-1]) / Fs    
    slopes = np.array([(peaks[i] - valleys[i]) / (peakTimes[i] - valleyTimes[i]) for i in range(len(peaks))])
    peakdelays = np.array([(peakTimes[i] - stimTimes[i]) for i in range(len(valleyTimes))])
    onsetdelays = np.array([(valleyTimes[i] - stimTimes[i]) for i in range(len(valleyTimes))])
    # spikes if peaks are greater than 20
    spikes = np.array([1 if peak > 20 else 0 for peak in peaks])

    for col in newcolumns:
        prefix, col_idx = col.split('_')
        col_idx = int(col_idx)

        if col_idx < len(valleys):  # Ensure index is within bounds
            if prefix == 'stimtime':
                df.loc[row.name, col] = stimTimes[col_idx]
            elif prefix == 'valley':
                df.loc[row.name, col] = valleys[col_idx]
            elif prefix == 'peak':
                df.loc[row.name, col] = peaks[col_idx]
            elif prefix == 'slope':
                df.loc[row.name, col] = slopes[col_idx]
            elif prefix == 'peakdelay':
                df.loc[row.name, col] = peakdelays[col_idx]
            elif prefix == 'onsetdelay':
                df.loc[row.name, col] = onsetdelays[col_idx]
            elif prefix == 'normpeak':
                df.loc[row.name, col] = normpeaks[col_idx]
            elif prefix == 'PSC':
                df.loc[row.name, col] = pscs[col_idx]
            elif prefix == 'normPSC':
                df.loc[row.name, col] = normPSCs[col_idx]
            elif prefix == 'spike':
                df.loc[row.name, col] = spikes[col_idx]


    # counter for every 100 rows
    if idx % 100 == 0:
        print(f"Processed {idx} rows")

# Assuming df is your final DataFrame
metadata_cols1 = df.iloc[:, :49]
metadata_cols2 = df.iloc[:, 20049:]

# Concatenating the two slices into a new DataFrame
new_df = pd.concat([metadata_cols1, metadata_cols2], axis=1)

# Display the shape of the new DataFrame to verify
print(new_df.shape)
print(f"Kernel errors: {k}, Runtime errors: {r}, No probe pulse: {c}, Low frequency: {d}")

Processing 0 row with trialID 1931_0_1
[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 230 -220.78738403320312 0.20000000298023224 0.5 1000 (4000,) [-19.21031829   0.0228056 ]
Processed 0 rows
Processing 1 row with trialID 1931_0_2
[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 222 -199.8029022216797 0.20000000298023224 0.5 1000 (4000,) [-1.57260233e+01  8.15335266e-03]
Processing 2 row with trialID 1931_0_3
[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 225 -229.41978454589844 0.20000000298023224 0.5 1000 (4000,) [-9.93531431e+00  3.19912143e-03]
Processing 3 row with trialID 1931_0_4
[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 243 -219.04525756835938 0.20000000298023224 0.5 1000 (4000,) [-2.32832742e+01  1.11507086e-02]
Processing 4 row with trialID 1931_0_5
[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 215 -187.76808166503906 0.20000000298023224 0.5 1000 (4000,) [-1.46841557e+01  4.4382

  ret, cov = sci.curve_fit(lambda t,a,tau: a*np.exp(-t/tau), x, y, p0=(pk-baseline,0.02) )
  ret, cov = sci.curve_fit(lambda t,a,tau: a*np.exp(-t/tau), x, y, p0=(pk-baseline,0.02) )


[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 215 -161.1312713623047 0.20000000298023224 0.5 1000 (4000,) [-1.87205654e+00  1.16693805e-04]
Processing 22 row with trialID 1931_0_23
[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 223 -141.347412109375 0.20000000298023224 0.5 1000 (4000,) [-6.15009278e+00  1.55025262e-03]
Processing 23 row with trialID 1931_0_24
[4000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000] 20 203 -142.08270263671875 0.20000000298023224 0.5 1000 (4000,) [-6.17306998e+00  5.41626714e-03]
Processing 24 row with trialID 1931_1_1
[4000, 10000, 10400, 10800, 11200, 11600, 12000, 12400, 12800] 50 243 -93.56620025634766 0.20000000298023224 0.5 400 (4000,) [10.26804671  1.39351095]
Processing 25 row with trialID 1931_1_2
[4000, 10000, 10400, 10800, 11200, 11600, 12000, 12400, 12800] 50 219 -74.7329330444336 0.20000000298023224 0.5 400 (4000,) [1.34312709e+01 9.22445796e+06]
Processing 26 row with trialID 1931_1_3
[4000, 1000



Processing 44 row with trialID 1931_1_21
[4000, 10000, 10400, 10800, 11200, 11600, 12000, 12400, 12800] 50 204 -46.49139404296875 0.20000000298023224 0.5 400 (4000,) [14.74488234  2.0772227 ]
Processing 45 row with trialID 1931_1_22
[4000, 10000, 10400, 10800, 11200, 11600, 12000, 12400, 12800] 50 209 -108.10346221923828 0.20000000298023224 0.5 400 (4000,) [20.08931598  1.20297144]
Processing 46 row with trialID 1931_1_23
[4000, 10000, 10400, 10800, 11200, 11600, 12000, 12400, 12800] 50 216 -121.44862365722656 0.20000000298023224 0.5 400 (4000,) [1.48863287e+01 3.03728721e+05]
Processing 47 row with trialID 1931_1_24
[4000, 10000, 10400, 10800, 11200, 11600, 12000, 12400, 12800] 50 206 -144.42112731933594 0.20000000298023224 0.5 400 (4000,) [ 1.00333300e+01 -4.09867871e+05]
Processing 48 row with trialID 1931_3_1
[4000, 10000, 10500, 11000, 11500, 12000, 12500, 13000, 13500] 40 200 -93.99629974365234 0.20000000298023224 0.5 500 (4000,) [10.32665032 -0.63420395]
Processing 49 row with t

In [5]:
print(new_df.shape)
# save df
new_df.to_hdf(data_path / "all_cells_FreqSweep_VC_kernelfit_response_measurements.h5", key='data', mode='w')
# save as excel
new_df.to_excel(data_path / "all_cells_FreqSweep_VC_kernelfit_response_measurements.xlsx")

# drop those rows from df that have NaN values in column 'valley_0'
new_df = new_df.dropna(subset=['valley_0'])
print(new_df.shape)

(4407, 163)


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block5_values] [items->Index(['sex', 'unit', 'location', 'protocol', 'numSq', 'clampMode',
       'condition', 'numPatterns', 'frameChangeTimes', 'pulseTimes',
       'fieldData', 'peaks_cell', 'peaks_cell_norm', 'auc_cell', 'slope_cell',
       'delay_cell', 'peaks_field', 'peaks_field_norm', 'cell_fpr',
       'field_fpr', 'cell_ppr', 'cell_stpr', 'field_ppr', 'field_stpr',
       'trialID', 'cellunit', 'fieldunit'],
      dtype='object')]

  new_df.to_hdf(data_path / "all_cells_FreqSweep_VC_kernelfit_response_measurements.h5", key='data', mode='w')


(4082, 163)


In [6]:
# save df
new_df.to_hdf(data_path / "all_cells_FreqSweep_VC_kernelfit_response_measurements_noNANs.h5", key='data', mode='w')
# save as excel
new_df.to_excel(data_path / "all_cells_FreqSweep_VC_kernelfit_response_measurements_noNANs.xlsx")

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block5_values] [items->Index(['sex', 'unit', 'location', 'protocol', 'numSq', 'clampMode',
       'condition', 'numPatterns', 'frameChangeTimes', 'pulseTimes',
       'fieldData', 'peaks_cell', 'peaks_cell_norm', 'auc_cell', 'slope_cell',
       'delay_cell', 'peaks_field', 'peaks_field_norm', 'cell_fpr',
       'field_fpr', 'cell_ppr', 'cell_stpr', 'field_ppr', 'field_stpr',
       'trialID', 'cellunit', 'fieldunit'],
      dtype='object')]

  new_df.to_hdf(data_path / "all_cells_FreqSweep_VC_kernelfit_response_measurements_noNANs.h5", key='data', mode='w')
